3 "#line 1 \"libavfilter/opencl/deshake.cl\"\n" 5 " * This file is part of FFmpeg.\n" 7 " * FFmpeg is free software; you can redistribute it and/or\n" 8 " * modify it under the terms of the GNU Lesser General Public\n" 9 " * License as published by the Free Software Foundation; either\n" 10 " * version 2.1 of the License, or (at your option) any later version.\n" 12 " * FFmpeg is distributed in the hope that it will be useful,\n" 13 " * but WITHOUT ANY WARRANTY; without even the implied warranty of\n" 14 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" 15 " * Lesser General Public License for more details.\n" 17 " * You should have received a copy of the GNU Lesser General Public\n" 18 " * License along with FFmpeg; if not, write to the Free Software\n" 19 " * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n" 21 " * Copyright (C) 2000, Intel Corporation, all rights reserved.\n" 22 " * Copyright (C) 2013, OpenCV Foundation, all rights reserved.\n" 23 " * Third party copyrights are property of their respective owners.\n" 25 " * Redistribution and use in source and binary forms, with or without modification,\n" 26 " * are permitted provided that the following conditions are met:\n" 28 " * * Redistribution's of source code must retain the above copyright notice,\n" 29 " * this list of conditions and the following disclaimer.\n" 31 " * * Redistribution's in binary form must reproduce the above copyright notice,\n" 32 " * this list of conditions and the following disclaimer in the documentation\n" 33 " * and/or other materials provided with the distribution.\n" 35 " * * The name of the copyright holders may not be used to endorse or promote products\n" 36 " * derived from this software without specific prior written permission.\n" 38 " * This software is provided by the copyright holders and contributors \"as is\" and\n" 39 " * any express or implied warranties, including, but not limited to, the implied\n" 40 " * warranties of merchantability and fitness for a particular purpose are disclaimed.\n" 41 " * In no event shall the Intel Corporation or contributors be liable for any direct,\n" 42 " * indirect, incidental, special, exemplary, or consequential damages\n" 43 " * (including, but not limited to, procurement of substitute goods or services;\n" 44 " * loss of use, data, or profits; or business interruption) however caused\n" 45 " * and on any theory of liability, whether in contract, strict liability,\n" 46 " * or tort (including negligence or otherwise) arising in any way out of\n" 47 " * the use of this software, even if advised of the possibility of such damage.\n" 50 "#define HARRIS_THRESHOLD 3.0f\n" 51 "// Block size over which to compute harris response\n" 53 "// Note that changing this will require fiddling with the local array sizes in\n" 54 "// harris_response\n" 55 "#define HARRIS_RADIUS 2\n" 56 "#define DISTANCE_THRESHOLD 80\n" 58 "// Sub-pixel refinement window for feature points\n" 59 "#define REFINE_WIN_HALF_W 5\n" 60 "#define REFINE_WIN_HALF_H 5\n" 61 "#define REFINE_WIN_W 11 // REFINE_WIN_HALF_W * 2 + 1\n" 62 "#define REFINE_WIN_H 11\n" 64 "// Non-maximum suppression window size\n" 65 "#define NONMAX_WIN 30\n" 66 "#define NONMAX_WIN_HALF 15 // NONMAX_WIN / 2\n" 68 "typedef struct PointPair {\n" 69 " // Previous frame\n" 75 "typedef struct SmoothedPointPair {\n" 76 " // Non-smoothed point in current frame\n" 78 " // Smoothed point in current frame\n" 80 "} SmoothedPointPair;\n" 82 "typedef struct MotionVector {\n" 84 " // Used to mark vectors as potential outliers\n" 85 " int should_consider;\n" 88 "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n" 89 " CLK_ADDRESS_CLAMP_TO_EDGE |\n" 90 " CLK_FILTER_NEAREST;\n" 92 "const sampler_t sampler_linear = CLK_NORMALIZED_COORDS_FALSE |\n" 93 " CLK_ADDRESS_CLAMP_TO_EDGE |\n" 94 " CLK_FILTER_LINEAR;\n" 96 "const sampler_t sampler_linear_mirror = CLK_NORMALIZED_COORDS_TRUE |\n" 97 " CLK_ADDRESS_MIRRORED_REPEAT |\n" 98 " CLK_FILTER_LINEAR;\n" 100 "// Writes to a 1D array at loc, treating it as a 2D array with the same\n" 101 "// dimensions as the global work size.\n" 102 "static void write_to_1d_arrf(__global float *buf, int2 loc, float val) {\n" 103 " buf[loc.x + loc.y * get_global_size(0)] = val;\n" 106 "static void write_to_1d_arrul8(__global ulong8 *buf, int2 loc, ulong8 val) {\n" 107 " buf[loc.x + loc.y * get_global_size(0)] = val;\n" 110 "static void write_to_1d_arrvec(__global MotionVector *buf, int2 loc, MotionVector val) {\n" 111 " buf[loc.x + loc.y * get_global_size(0)] = val;\n" 114 "static void write_to_1d_arrf2(__global float2 *buf, int2 loc, float2 val) {\n" 115 " buf[loc.x + loc.y * get_global_size(0)] = val;\n" 118 "static ulong8 read_from_1d_arrul8(__global const ulong8 *buf, int2 loc) {\n" 119 " return buf[loc.x + loc.y * get_global_size(0)];\n" 122 "static float2 read_from_1d_arrf2(__global const float2 *buf, int2 loc) {\n" 123 " return buf[loc.x + loc.y * get_global_size(0)];\n" 126 "// Returns the grayscale value at the given point.\n" 127 "static float pixel_grayscale(__read_only image2d_t src, int2 loc) {\n" 128 " float4 pixel = read_imagef(src, sampler, loc);\n" 129 " return (pixel.x + pixel.y + pixel.z) / 3.0f;\n" 132 "static float convolve(\n" 133 " __local const float *grayscale,\n" 134 " int local_idx_x,\n" 135 " int local_idx_y,\n" 136 " float mask[3][3]\n" 140 " // These loops touch each pixel surrounding loc as well as loc itself\n" 141 " for (int i = 1, i2 = 0; i >= -1; --i, ++i2) {\n" 142 " for (int j = -1, j2 = 0; j <= 1; ++j, ++j2) {\n" 143 " ret += mask[i2][j2] * grayscale[(local_idx_x + 3 + j) + (local_idx_y + 3 + i) * 14];\n" 150 "// Sums dx * dy for all pixels within radius of loc\n" 151 "static float sum_deriv_prod(\n" 152 " __local const float *grayscale,\n" 153 " float mask_x[3][3],\n" 154 " float mask_y[3][3]\n" 158 " for (int i = HARRIS_RADIUS; i >= -HARRIS_RADIUS; --i) {\n" 159 " for (int j = -HARRIS_RADIUS; j <= HARRIS_RADIUS; ++j) {\n" 160 " ret += convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask_x) *\n" 161 " convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask_y);\n" 168 "// Sums d<>^2 (determined by mask) for all pixels within radius of loc\n" 169 "static float sum_deriv_pow(__local const float *grayscale, float mask[3][3])\n" 173 " for (int i = HARRIS_RADIUS; i >= -HARRIS_RADIUS; --i) {\n" 174 " for (int j = -HARRIS_RADIUS; j <= HARRIS_RADIUS; ++j) {\n" 175 " float deriv = convolve(grayscale, get_local_id(0) + j, get_local_id(1) + i, mask);\n" 176 " ret += deriv * deriv;\n" 183 "// Fills a box with the given radius and pixel around loc\n" 184 "static void draw_box(__write_only image2d_t dst, int2 loc, float4 pixel, int radius)\n" 186 " for (int i = -radius; i <= radius; ++i) {\n" 187 " for (int j = -radius; j <= radius; ++j) {\n" 191 " // Clamp to avoid writing outside image bounds\n" 192 " clamp(loc.x + i, 0, get_image_dim(dst).x - 1),\n" 193 " clamp(loc.y + j, 0, get_image_dim(dst).y - 1)\n" 201 "// Converts the src image to grayscale\n" 202 "__kernel void grayscale(\n" 203 " __read_only image2d_t src,\n" 204 " __write_only image2d_t grayscale\n" 206 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n" 207 " write_imagef(grayscale, loc, (float4)(pixel_grayscale(src, loc), 0.0f, 0.0f, 1.0f));\n" 210 "// This kernel computes the harris response for the given grayscale src image\n" 211 "// within the given radius and writes it to harris_buf\n" 212 "__kernel void harris_response(\n" 213 " __read_only image2d_t grayscale,\n" 214 " __global float *harris_buf\n" 216 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n" 218 " if (loc.x > get_image_width(grayscale) - 1 || loc.y > get_image_height(grayscale) - 1) {\n" 219 " write_to_1d_arrf(harris_buf, loc, 0);\n" 223 " float scale = 1.0f / ((1 << 2) * HARRIS_RADIUS * 255.0f);\n" 225 " float sobel_mask_x[3][3] = {\n" 231 " float sobel_mask_y[3][3] = {\n" 237 " // 8 x 8 local work + 3 pixels around each side (needed to accomodate for the\n" 238 " // block size radius of 2)\n" 239 " __local float grayscale_data[196];\n" 241 " int idx = get_group_id(0) * get_local_size(0);\n" 242 " int idy = get_group_id(1) * get_local_size(1);\n" 244 " for (int i = idy - 3, it = 0; i < idy + (int)get_local_size(1) + 3; i++, it++) {\n" 245 " for (int j = idx - 3, jt = 0; j < idx + (int)get_local_size(0) + 3; j++, jt++) {\n" 246 " grayscale_data[jt + it * 14] = read_imagef(grayscale, sampler, (int2)(j, i)).x;\n" 250 " barrier(CLK_LOCAL_MEM_FENCE);\n" 252 " float sumdxdy = sum_deriv_prod(grayscale_data, sobel_mask_x, sobel_mask_y);\n" 253 " float sumdx2 = sum_deriv_pow(grayscale_data, sobel_mask_x);\n" 254 " float sumdy2 = sum_deriv_pow(grayscale_data, sobel_mask_y);\n" 256 " float trace = sumdx2 + sumdy2;\n" 257 " // r = det(M) - k(trace(M))^2\n" 258 " // k usually between 0.04 to 0.06\n" 259 " float r = (sumdx2 * sumdy2 - sumdxdy * sumdxdy) - 0.04f * (trace * trace) * pown(scale, 4);\n" 261 " // Threshold the r value\n" 262 " harris_buf[loc.x + loc.y * get_image_width(grayscale)] = r * step(HARRIS_THRESHOLD, r);\n" 265 "// Gets a patch centered around a float coordinate from a grayscale image using\n" 266 "// bilinear interpolation\n" 267 "static void get_rect_sub_pix(\n" 268 " __read_only image2d_t grayscale,\n" 274 " float2 offset = ((float2)(size_x, size_y) - 1.0f) * 0.5f;\n" 276 " for (int i = 0; i < size_y; i++) {\n" 277 " for (int j = 0; j < size_x; j++) {\n" 278 " buffer[i * size_x + j] = read_imagef(\n" 281 " (float2)(j, i) + center - offset\n" 287 "// Refines detected features at a sub-pixel level\n" 289 "// This function is ported from OpenCV\n" 290 "static float2 corner_sub_pix(\n" 291 " __read_only image2d_t grayscale,\n" 295 " float2 init = feature;\n" 296 " int src_width = get_global_size(0);\n" 297 " int src_height = get_global_size(1);\n" 299 " const int max_iters = 40;\n" 300 " const float eps = 0.001f * 0.001f;\n" 305 " float subpix[(REFINE_WIN_W + 2) * (REFINE_WIN_H + 2)];\n" 306 " const float flt_epsilon = 0x1.0p-23f;\n" 309 " float2 feature_tmp;\n" 310 " float a = 0, b = 0, c = 0, bb1 = 0, bb2 = 0;\n" 312 " get_rect_sub_pix(grayscale, subpix, REFINE_WIN_W + 2, REFINE_WIN_H + 2, feature);\n" 313 " float *subpix_ptr = subpix;\n" 314 " subpix_ptr += REFINE_WIN_W + 2 + 1;\n" 316 " // process gradient\n" 317 " for (i = 0, k = 0; i < REFINE_WIN_H; i++, subpix_ptr += REFINE_WIN_W + 2) {\n" 318 " float py = i - REFINE_WIN_HALF_H;\n" 320 " for (j = 0; j < REFINE_WIN_W; j++, k++) {\n" 321 " float m = mask[k];\n" 322 " float tgx = subpix_ptr[j + 1] - subpix_ptr[j - 1];\n" 323 " float tgy = subpix_ptr[j + REFINE_WIN_W + 2] - subpix_ptr[j - REFINE_WIN_W - 2];\n" 324 " float gxx = tgx * tgx * m;\n" 325 " float gxy = tgx * tgy * m;\n" 326 " float gyy = tgy * tgy * m;\n" 327 " float px = j - REFINE_WIN_HALF_W;\n" 333 " bb1 += gxx * px + gxy * py;\n" 334 " bb2 += gxy * px + gyy * py;\n" 338 " float det = a * c - b * b;\n" 339 " if (fabs(det) <= flt_epsilon * flt_epsilon) {\n" 343 " // 2x2 matrix inversion\n" 344 " float scale = 1.0f / det;\n" 345 " feature_tmp.x = (float)(feature.x + (c * scale * bb1) - (b * scale * bb2));\n" 346 " feature_tmp.y = (float)(feature.y - (b * scale * bb1) + (a * scale * bb2));\n" 347 " err = dot(feature_tmp - feature, feature_tmp - feature);\n" 349 " feature = feature_tmp;\n" 350 " if (feature.x < 0 || feature.x >= src_width || feature.y < 0 || feature.y >= src_height) {\n" 353 " } while (++iter < max_iters && err > eps);\n" 355 " // Make sure new point isn't too far from the initial point (indicates poor convergence)\n" 356 " if (fabs(feature.x - init.x) > REFINE_WIN_HALF_W || fabs(feature.y - init.y) > REFINE_WIN_HALF_H) {\n" 363 "// Performs non-maximum suppression on the harris response and writes the resulting\n" 364 "// feature locations to refined_features.\n" 366 "// Assumes that refined_features and the global work sizes are set up such that the image\n" 367 "// is split up into a grid of 32x32 blocks where each block has a single slot in the\n" 368 "// refined_features buffer. This kernel finds the best corner in each block (if the\n" 369 "// block has any) and writes it to the corresponding slot in the buffer.\n" 371 "// If subpixel_refine is true, the features are additionally refined at a sub-pixel\n" 372 "// level for increased precision.\n" 373 "__kernel void refine_features(\n" 374 " __read_only image2d_t grayscale,\n" 375 " __global const float *harris_buf,\n" 376 " __global float2 *refined_features,\n" 377 " int subpixel_refine\n" 379 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n" 380 " // The location in the grayscale buffer rather than the compacted grid\n" 381 " int2 loc_i = (int2)(loc.x * 32, loc.y * 32);\n" 384 " float max_val = 0;\n" 385 " float2 loc_max = (float2)(-1, -1);\n" 387 " int end_x = min(loc_i.x + 32, (int)get_image_dim(grayscale).x - 1);\n" 388 " int end_y = min(loc_i.y + 32, (int)get_image_dim(grayscale).y - 1);\n" 390 " for (int i = loc_i.x; i < end_x; ++i) {\n" 391 " for (int j = loc_i.y; j < end_y; ++j) {\n" 392 " new_val = harris_buf[i + j * get_image_dim(grayscale).x];\n" 394 " if (new_val > max_val) {\n" 395 " max_val = new_val;\n" 396 " loc_max = (float2)(i, j);\n" 401 " if (max_val == 0) {\n" 402 " // There are no features in this part of the frame\n" 403 " write_to_1d_arrf2(refined_features, loc, loc_max);\n" 407 " if (subpixel_refine) {\n" 408 " float mask[REFINE_WIN_H * REFINE_WIN_W];\n" 409 " for (int i = 0; i < REFINE_WIN_H; i++) {\n" 410 " float y = (float)(i - REFINE_WIN_HALF_H) / REFINE_WIN_HALF_H;\n" 411 " float vy = exp(-y * y);\n" 413 " for (int j = 0; j < REFINE_WIN_W; j++) {\n" 414 " float x = (float)(j - REFINE_WIN_HALF_W) / REFINE_WIN_HALF_W;\n" 415 " mask[i * REFINE_WIN_W + j] = (float)(vy * exp(-x * x));\n" 419 " loc_max = corner_sub_pix(grayscale, loc_max, mask);\n" 422 " write_to_1d_arrf2(refined_features, loc, loc_max);\n" 425 "// Extracts BRIEF descriptors from the grayscale src image for the given features\n" 426 "// using the provided sampler.\n" 427 "__kernel void brief_descriptors(\n" 428 " __read_only image2d_t grayscale,\n" 429 " __global const float2 *refined_features,\n" 430 " // for 512 bit descriptors\n" 431 " __global ulong8 *desc_buf,\n" 432 " __global const PointPair *brief_pattern\n" 434 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n" 435 " float2 feature = read_from_1d_arrf2(refined_features, loc);\n" 437 " // There was no feature in this part of the frame\n" 438 " if (feature.x == -1) {\n" 439 " write_to_1d_arrul8(desc_buf, loc, (ulong8)(0));\n" 443 " ulong8 desc = 0;\n" 444 " ulong *p = &desc;\n" 446 " for (int i = 0; i < 8; ++i) {\n" 447 " for (int j = 0; j < 64; ++j) {\n" 448 " PointPair pair = brief_pattern[j * (i + 1)];\n" 449 " float l1 = read_imagef(grayscale, sampler_linear, feature + pair.p1).x;\n" 450 " float l2 = read_imagef(grayscale, sampler_linear, feature + pair.p2).x;\n" 453 " p[i] |= 1UL << j;\n" 458 " write_to_1d_arrul8(desc_buf, loc, desc);\n" 461 "// Given buffers with descriptors for the current and previous frame, determines\n" 462 "// which ones match, writing correspondences to matches_buf.\n" 464 "// Feature and descriptor buffers are assumed to be compacted (each element sourced\n" 465 "// from a 32x32 block in the frame being processed).\n" 466 "__kernel void match_descriptors(\n" 467 " __global const float2 *prev_refined_features,\n" 468 " __global const float2 *refined_features,\n" 469 " __global const ulong8 *desc_buf,\n" 470 " __global const ulong8 *prev_desc_buf,\n" 471 " __global MotionVector *matches_buf\n" 473 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n" 474 " ulong8 desc = read_from_1d_arrul8(desc_buf, loc);\n" 475 " const int search_radius = 3;\n" 477 " MotionVector invalid_vector = (MotionVector) {\n" 479 " (float2)(-1, -1),\n" 480 " (float2)(-1, -1)\n" 485 " if (desc.s0 == 0 && desc.s1 == 0) {\n" 486 " // There was no feature in this part of the frame\n" 487 " write_to_1d_arrvec(\n" 495 " int2 start = max(loc - search_radius, 0);\n" 496 " int2 end = min(loc + search_radius, (int2)(get_global_size(0) - 1, get_global_size(1) - 1));\n" 498 " for (int i = start.x; i < end.x; ++i) {\n" 499 " for (int j = start.y; j < end.y; ++j) {\n" 500 " int2 prev_point = (int2)(i, j);\n" 501 " int total_dist = 0;\n" 503 " ulong8 prev_desc = read_from_1d_arrul8(prev_desc_buf, prev_point);\n" 505 " if (prev_desc.s0 == 0 && prev_desc.s1 == 0) {\n" 509 " ulong *prev_desc_p = &prev_desc;\n" 510 " ulong *desc_p = &desc;\n" 512 " for (int i = 0; i < 8; i++) {\n" 513 " total_dist += popcount(desc_p[i] ^ prev_desc_p[i]);\n" 516 " if (total_dist < DISTANCE_THRESHOLD) {\n" 517 " write_to_1d_arrvec(\n" 520 " (MotionVector) {\n" 522 " read_from_1d_arrf2(prev_refined_features, prev_point),\n" 523 " read_from_1d_arrf2(refined_features, loc)\n" 534 " // There is no found match for this point\n" 535 " write_to_1d_arrvec(\n" 542 "// Returns the position of the given point after the transform is applied\n" 543 "static float2 transformed_point(float2 p, __global const float *transform) {\n" 546 " ret.x = p.x * transform[0] + p.y * transform[1] + transform[2];\n" 547 " ret.y = p.x * transform[3] + p.y * transform[4] + transform[5];\n" 553 "// Performs the given transform on the src image\n" 554 "__kernel void transform(\n" 555 " __read_only image2d_t src,\n" 556 " __write_only image2d_t dst,\n" 557 " __global const float *transform\n" 559 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n" 560 " float2 norm = convert_float2(get_image_dim(src));\n" 567 " sampler_linear_mirror,\n" 568 " transformed_point((float2)(loc.x, loc.y), transform) / norm\n" 573 "// Returns the new location of the given point using the given crop bounding box\n" 574 "// and the width and height of the original frame.\n" 575 "static float2 cropped_point(\n" 577 " float2 top_left,\n" 578 " float2 bottom_right,\n" 583 " float crop_width = bottom_right.x - top_left.x;\n" 584 " float crop_height = bottom_right.y - top_left.y;\n" 586 " float width_norm = p.x / (float)orig_dim.x;\n" 587 " float height_norm = p.y / (float)orig_dim.y;\n" 589 " ret.x = (width_norm * crop_width) + top_left.x;\n" 590 " ret.y = (height_norm * crop_height) + ((float)orig_dim.y - bottom_right.y);\n" 595 "// Upscales the given cropped region to the size of the original frame\n" 596 "__kernel void crop_upscale(\n" 597 " __read_only image2d_t src,\n" 598 " __write_only image2d_t dst,\n" 599 " float2 top_left,\n" 600 " float2 bottom_right\n" 602 " int2 loc = (int2)(get_global_id(0), get_global_id(1));\n" 610 " cropped_point((float2)(loc.x, loc.y), top_left, bottom_right, get_image_dim(dst))\n" 615 "// Draws boxes to represent the given point matches and uses the given transform\n" 616 "// and crop info to make sure their positions are accurate on the transformed frame.\n" 618 "// model_matches is an array of three points that were used by the RANSAC process\n" 619 "// to generate the given transform\n" 620 "__kernel void draw_debug_info(\n" 621 " __write_only image2d_t dst,\n" 622 " __global const MotionVector *matches,\n" 623 " __global const MotionVector *model_matches,\n" 624 " int num_model_matches,\n" 625 " __global const float *transform\n" 627 " int loc = get_global_id(0);\n" 628 " MotionVector vec = matches[loc];\n" 629 " // Black box: matched point that RANSAC considered an outlier\n" 630 " float4 big_rect_color = (float4)(0.1f, 0.1f, 0.1f, 1.0f);\n" 632 " if (vec.should_consider) {\n" 633 " // Green box: matched point that RANSAC considered an inlier\n" 634 " big_rect_color = (float4)(0.0f, 1.0f, 0.0f, 1.0f);\n" 637 " for (int i = 0; i < num_model_matches; i++) {\n" 638 " if (vec.p.p2.x == model_matches[i].p.p2.x && vec.p.p2.y == model_matches[i].p.p2.y) {\n" 639 " // Orange box: point used to calculate model\n" 640 " big_rect_color = (float4)(1.0f, 0.5f, 0.0f, 1.0f);\n" 644 " float2 transformed_p1 = transformed_point(vec.p.p1, transform);\n" 645 " float2 transformed_p2 = transformed_point(vec.p.p2, transform);\n" 647 " draw_box(dst, (int2)(transformed_p2.x, transformed_p2.y), big_rect_color, 5);\n" 648 " // Small light blue box: the point in the previous frame\n" 649 " draw_box(dst, (int2)(transformed_p1.x, transformed_p1.y), (float4)(0.0f, 0.3f, 0.7f, 1.0f), 3);\n" const char * ff_opencl_source_deshake