1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5// By downloading, copying, installing or using the software you agree to this license.
6// If you do not agree to this license, do not download, install,
7// copy or use the software.
8//
9//
10// Intel License Agreement
11// For Open Source Computer Vision Library
12//
13// Copyright (C) 2000, Intel Corporation, all rights reserved.
14// Third party copyrights are property of their respective owners.
15//
16// Redistribution and use in source and binary forms, with or without modification,
17// are permitted provided that the following conditions are met:
18//
19// * Redistribution's of source code must retain the above copyright notice,
20// this list of conditions and the following disclaimer.
21//
22// * Redistribution's in binary form must reproduce the above copyright notice,
23// this list of conditions and the following disclaimer in the documentation
24// and/or other materials provided with the distribution.
25//
26// * The name of Intel Corporation may not be used to endorse or promote products
27// derived from this software without specific prior written permission.
28//
29// This software is provided by the copyright holders and contributors "as is" and
30// any express or implied warranties, including, but not limited to, the implied
31// warranties of merchantability and fitness for a particular purpose are disclaimed.
32// In no event shall the Intel Corporation or contributors be liable for any direct,
33// indirect, incidental, special, exemplary, or consequential damages
34// (including, but not limited to, procurement of substitute goods or services;
35// loss of use, data, or profits; or business interruption) however caused
36// and on any theory of liability, whether in contract, strict liability,
37// or tort (including negligence or otherwise) arising in any way out of
38// the use of this software, even if advised of the possibility of such damage.
39//
40//M*/
41
42#include "precomp.hpp"
43#include "opencv2/core/opencl/runtime/opencl_clfft.hpp"
44#include "opencv2/core/opencl/runtime/opencl_core.hpp"
45#include "opencl_kernels_core.hpp"
46#include <map>
47
48namespace cv
49{
50
51// On Win64 optimized versions of DFT and DCT fail the tests (fixed in VS2010)
52#if defined _MSC_VER && !defined CV_ICC && defined _M_X64 && _MSC_VER < 1600
53# pragma optimize("", off)
54# pragma warning(disable: 4748)
55#endif
56
57#if IPP_VERSION_X100 >= 710
58#define USE_IPP_DFT 1
59#else
60#undef USE_IPP_DFT
61#endif
62
63/****************************************************************************************\
64 Discrete Fourier Transform
65\****************************************************************************************/
66
67static unsigned char bitrevTab[] =
68{
69 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0,0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0,
70 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8,0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8,
71 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4,0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4,
72 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec,0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc,
73 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2,0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2,
74 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea,0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa,
75 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6,0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6,
76 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee,0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe,
77 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1,0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1,
78 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9,0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9,
79 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5,0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5,
80 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed,0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd,
81 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3,0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3,
82 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb,0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb,
83 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7,0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7,
84 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef,0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
85};
86
87static const double DFTTab[][2] =
88{
89{ 1.00000000000000000, 0.00000000000000000 },
90{-1.00000000000000000, 0.00000000000000000 },
91{ 0.00000000000000000, 1.00000000000000000 },
92{ 0.70710678118654757, 0.70710678118654746 },
93{ 0.92387953251128674, 0.38268343236508978 },
94{ 0.98078528040323043, 0.19509032201612825 },
95{ 0.99518472667219693, 0.09801714032956060 },
96{ 0.99879545620517241, 0.04906767432741802 },
97{ 0.99969881869620425, 0.02454122852291229 },
98{ 0.99992470183914450, 0.01227153828571993 },
99{ 0.99998117528260111, 0.00613588464915448 },
100{ 0.99999529380957619, 0.00306795676296598 },
101{ 0.99999882345170188, 0.00153398018628477 },
102{ 0.99999970586288223, 0.00076699031874270 },
103{ 0.99999992646571789, 0.00038349518757140 },
104{ 0.99999998161642933, 0.00019174759731070 },
105{ 0.99999999540410733, 0.00009587379909598 },
106{ 0.99999999885102686, 0.00004793689960307 },
107{ 0.99999999971275666, 0.00002396844980842 },
108{ 0.99999999992818922, 0.00001198422490507 },
109{ 0.99999999998204725, 0.00000599211245264 },
110{ 0.99999999999551181, 0.00000299605622633 },
111{ 0.99999999999887801, 0.00000149802811317 },
112{ 0.99999999999971945, 0.00000074901405658 },
113{ 0.99999999999992983, 0.00000037450702829 },
114{ 0.99999999999998246, 0.00000018725351415 },
115{ 0.99999999999999567, 0.00000009362675707 },
116{ 0.99999999999999889, 0.00000004681337854 },
117{ 0.99999999999999978, 0.00000002340668927 },
118{ 0.99999999999999989, 0.00000001170334463 },
119{ 1.00000000000000000, 0.00000000585167232 },
120{ 1.00000000000000000, 0.00000000292583616 }
121};
122
123namespace {
124template <typename T>
125struct Constants {
126 static const T sin_120;
127 static const T fft5_2;
128 static const T fft5_3;
129 static const T fft5_4;
130 static const T fft5_5;
131};
132
133template <typename T>
134const T Constants<T>::sin_120 = (T)0.86602540378443864676372317075294;
135
136template <typename T>
137const T Constants<T>::fft5_2 = (T)0.559016994374947424102293417182819;
138
139template <typename T>
140const T Constants<T>::fft5_3 = (T)-0.951056516295153572116439333379382;
141
142template <typename T>
143const T Constants<T>::fft5_4 = (T)-1.538841768587626701285145288018455;
144
145template <typename T>
146const T Constants<T>::fft5_5 = (T)0.363271264002680442947733378740309;
147
148} //namespace
149
150#define BitRev(i,shift) \
151 ((int)((((unsigned)bitrevTab[(i)&255] << 24)+ \
152 ((unsigned)bitrevTab[((i)>> 8)&255] << 16)+ \
153 ((unsigned)bitrevTab[((i)>>16)&255] << 8)+ \
154 ((unsigned)bitrevTab[((i)>>24)])) >> (shift)))
155
156static int
157DFTFactorize( int n, int* factors )
158{
159 int nf = 0, f, i, j;
160
161 if( n <= 5 )
162 {
163 factors[0] = n;
164 return 1;
165 }
166
167 f = (((n - 1)^n)+1) >> 1;
168 if( f > 1 )
169 {
170 factors[nf++] = f;
171 n = f == n ? 1 : n/f;
172 }
173
174 for( f = 3; n > 1; )
175 {
176 int d = n/f;
177 if( d*f == n )
178 {
179 factors[nf++] = f;
180 n = d;
181 }
182 else
183 {
184 f += 2;
185 if( f*f > n )
186 break;
187 }
188 }
189
190 if( n > 1 )
191 factors[nf++] = n;
192
193 f = (factors[0] & 1) == 0;
194 for( i = f; i < (nf+f)/2; i++ )
195 CV_SWAP( factors[i], factors[nf-i-1+f], j );
196
197 return nf;
198}
199
200static void
201DFTInit( int n0, int nf, const int* factors, int* itab, int elem_size, void* _wave, int inv_itab )
202{
203 int digits[34], radix[34];
204 int n = factors[0], m = 0;
205 int* itab0 = itab;
206 int i, j, k;
207 Complex<double> w, w1;
208 double t;
209
210 if( n0 <= 5 )
211 {
212 itab[0] = 0;
213 itab[n0-1] = n0-1;
214
215 if( n0 != 4 )
216 {
217 for( i = 1; i < n0-1; i++ )
218 itab[i] = i;
219 }
220 else
221 {
222 itab[1] = 2;
223 itab[2] = 1;
224 }
225 if( n0 == 5 )
226 {
227 if( elem_size == sizeof(Complex<double>) )
228 ((Complex<double>*)_wave)[0] = Complex<double>(1.,0.);
229 else
230 ((Complex<float>*)_wave)[0] = Complex<float>(1.f,0.f);
231 }
232 if( n0 != 4 )
233 return;
234 m = 2;
235 }
236 else
237 {
238 // radix[] is initialized from index 'nf' down to zero
239 CV_Assert (nf < 34);
240 radix[nf] = 1;
241 digits[nf] = 0;
242 for( i = 0; i < nf; i++ )
243 {
244 digits[i] = 0;
245 radix[nf-i-1] = radix[nf-i]*factors[nf-i-1];
246 }
247
248 if( inv_itab && factors[0] != factors[nf-1] )
249 itab = (int*)_wave;
250
251 if( (n & 1) == 0 )
252 {
253 int a = radix[1], na2 = n*a>>1, na4 = na2 >> 1;
254 for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
255 ;
256 if( n <= 2 )
257 {
258 itab[0] = 0;
259 itab[1] = na2;
260 }
261 else if( n <= 256 )
262 {
263 int shift = 10 - m;
264 for( i = 0; i <= n - 4; i += 4 )
265 {
266 j = (bitrevTab[i>>2]>>shift)*a;
267 itab[i] = j;
268 itab[i+1] = j + na2;
269 itab[i+2] = j + na4;
270 itab[i+3] = j + na2 + na4;
271 }
272 }
273 else
274 {
275 int shift = 34 - m;
276 for( i = 0; i < n; i += 4 )
277 {
278 int i4 = i >> 2;
279 j = BitRev(i4,shift)*a;
280 itab[i] = j;
281 itab[i+1] = j + na2;
282 itab[i+2] = j + na4;
283 itab[i+3] = j + na2 + na4;
284 }
285 }
286
287 digits[1]++;
288
289 if( nf >= 2 )
290 {
291 for( i = n, j = radix[2]; i < n0; )
292 {
293 for( k = 0; k < n; k++ )
294 itab[i+k] = itab[k] + j;
295 if( (i += n) >= n0 )
296 break;
297 j += radix[2];
298 for( k = 1; ++digits[k] >= factors[k]; k++ )
299 {
300 digits[k] = 0;
301 j += radix[k+2] - radix[k];
302 }
303 }
304 }
305 }
306 else
307 {
308 for( i = 0, j = 0;; )
309 {
310 itab[i] = j;
311 if( ++i >= n0 )
312 break;
313 j += radix[1];
314 for( k = 0; ++digits[k] >= factors[k]; k++ )
315 {
316 digits[k] = 0;
317 j += radix[k+2] - radix[k];
318 }
319 }
320 }
321
322 if( itab != itab0 )
323 {
324 itab0[0] = 0;
325 for( i = n0 & 1; i < n0; i += 2 )
326 {
327 int k0 = itab[i];
328 int k1 = itab[i+1];
329 itab0[k0] = i;
330 itab0[k1] = i+1;
331 }
332 }
333 }
334
335 if( (n0 & (n0-1)) == 0 )
336 {
337 w.re = w1.re = DFTTab[m][0];
338 w.im = w1.im = -DFTTab[m][1];
339 }
340 else
341 {
342 t = -CV_PI*2/n0;
343 w.im = w1.im = sin(x: t);
344 w.re = w1.re = std::sqrt(x: 1. - w1.im*w1.im);
345 }
346 n = (n0+1)/2;
347
348 if( elem_size == sizeof(Complex<double>) )
349 {
350 Complex<double>* wave = (Complex<double>*)_wave;
351
352 wave[0].re = 1.;
353 wave[0].im = 0.;
354
355 if( (n0 & 1) == 0 )
356 {
357 wave[n].re = -1.;
358 wave[n].im = 0;
359 }
360
361 for( i = 1; i < n; i++ )
362 {
363 wave[i] = w;
364 wave[n0-i].re = w.re;
365 wave[n0-i].im = -w.im;
366
367 t = w.re*w1.re - w.im*w1.im;
368 w.im = w.re*w1.im + w.im*w1.re;
369 w.re = t;
370 }
371 }
372 else
373 {
374 Complex<float>* wave = (Complex<float>*)_wave;
375 CV_Assert( elem_size == sizeof(Complex<float>) );
376
377 wave[0].re = 1.f;
378 wave[0].im = 0.f;
379
380 if( (n0 & 1) == 0 )
381 {
382 wave[n].re = -1.f;
383 wave[n].im = 0.f;
384 }
385
386 for( i = 1; i < n; i++ )
387 {
388 wave[i].re = (float)w.re;
389 wave[i].im = (float)w.im;
390 wave[n0-i].re = (float)w.re;
391 wave[n0-i].im = (float)-w.im;
392
393 t = w.re*w1.re - w.im*w1.im;
394 w.im = w.re*w1.im + w.im*w1.re;
395 w.re = t;
396 }
397 }
398}
399
400// Reference radix-2 implementation.
401template<typename T> struct DFT_R2
402{
403 void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
404 const int nx = n/2;
405 for(int i = 0 ; i < c_n; i += n)
406 {
407 Complex<T>* v = dst + i;
408 T r0 = v[0].re + v[nx].re;
409 T i0 = v[0].im + v[nx].im;
410 T r1 = v[0].re - v[nx].re;
411 T i1 = v[0].im - v[nx].im;
412 v[0].re = r0; v[0].im = i0;
413 v[nx].re = r1; v[nx].im = i1;
414
415 for( int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
416 {
417 v = dst + i + j;
418 r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
419 i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im;
420 r0 = v[0].re; i0 = v[0].im;
421
422 v[0].re = r0 + r1; v[0].im = i0 + i1;
423 v[nx].re = r0 - r1; v[nx].im = i0 - i1;
424 }
425 }
426 }
427};
428
429// Reference radix-3 implementation.
430template<typename T> struct DFT_R3
431{
432 void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
433 const int nx = n / 3;
434 for(int i = 0; i < c_n; i += n )
435 {
436 {
437 Complex<T>* v = dst + i;
438 T r1 = v[nx].re + v[nx*2].re;
439 T i1 = v[nx].im + v[nx*2].im;
440 T r0 = v[0].re;
441 T i0 = v[0].im;
442 T r2 = Constants<T>::sin_120*(v[nx].im - v[nx*2].im);
443 T i2 = Constants<T>::sin_120*(v[nx*2].re - v[nx].re);
444 v[0].re = r0 + r1; v[0].im = i0 + i1;
445 r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
446 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
447 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
448 }
449
450 for(int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
451 {
452 Complex<T>* v = dst + i + j;
453 T r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
454 T i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re;
455 T i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im;
456 T r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re;
457 T r1 = r0 + i2; T i1 = i0 + r2;
458
459 r2 = Constants<T>::sin_120*(i0 - r2); i2 = Constants<T>::sin_120*(i2 - r0);
460 r0 = v[0].re; i0 = v[0].im;
461 v[0].re = r0 + r1; v[0].im = i0 + i1;
462 r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
463 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
464 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
465 }
466 }
467 }
468};
469
470// Reference radix-5 implementation.
471template<typename T> struct DFT_R5
472{
473 void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
474 const int nx = n / 5;
475 for(int i = 0; i < c_n; i += n )
476 {
477 for(int j = 0, dw = 0; j < nx; j++, dw += dw0 )
478 {
479 Complex<T>* v0 = dst + i + j;
480 Complex<T>* v1 = v0 + nx*2;
481 Complex<T>* v2 = v1 + nx*2;
482
483 T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5;
484
485 r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im;
486 i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re;
487 r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im;
488 i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re;
489
490 r1 = r3 + r2; i1 = i3 + i2;
491 r3 -= r2; i3 -= i2;
492
493 r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
494 i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
495 r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im;
496 i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re;
497
498 r2 = r4 + r0; i2 = i4 + i0;
499 r4 -= r0; i4 -= i0;
500
501 r0 = v0[0].re; i0 = v0[0].im;
502 r5 = r1 + r2; i5 = i1 + i2;
503
504 v0[0].re = r0 + r5; v0[0].im = i0 + i5;
505
506 r0 -= (T)0.25*r5; i0 -= (T)0.25*i5;
507 r1 = Constants<T>::fft5_2*(r1 - r2); i1 = Constants<T>::fft5_2*(i1 - i2);
508 r2 = -Constants<T>::fft5_3*(i3 + i4); i2 = Constants<T>::fft5_3*(r3 + r4);
509
510 i3 *= -Constants<T>::fft5_5; r3 *= Constants<T>::fft5_5;
511 i4 *= -Constants<T>::fft5_4; r4 *= Constants<T>::fft5_4;
512
513 r5 = r2 + i3; i5 = i2 + r3;
514 r2 -= i4; i2 -= r4;
515
516 r3 = r0 + r1; i3 = i0 + i1;
517 r0 -= r1; i0 -= i1;
518
519 v0[nx].re = r3 + r2; v0[nx].im = i3 + i2;
520 v2[0].re = r3 - r2; v2[0].im = i3 - i2;
521
522 v1[0].re = r0 + r5; v1[0].im = i0 + i5;
523 v1[nx].re = r0 - r5; v1[nx].im = i0 - i5;
524 }
525 }
526 }
527};
528
529template<typename T> struct DFT_VecR2
530{
531 void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
532 DFT_R2<T>()(dst, c_n, n, dw0, wave);
533 }
534};
535
536template<typename T> struct DFT_VecR3
537{
538 void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
539 DFT_R3<T>()(dst, c_n, n, dw0, wave);
540 }
541};
542
543template<typename T> struct DFT_VecR4
544{
545 int operator()(Complex<T>*, int, int, int&, const Complex<T>*) const { return 1; }
546};
547
548#if CV_SSE3
549
550// multiplies *a and *b:
551// r_re + i*r_im = (a_re + i*a_im)*(b_re + i*b_im)
552// r_re and r_im are placed respectively in bits 31:0 and 63:32 of the resulting
553// vector register.
554inline __m128 complexMul(const Complex<float>* const a, const Complex<float>* const b) {
555 const __m128 z = _mm_setzero_ps();
556 const __m128 neg_elem0 = _mm_set_ps(z: 0.0f,y: 0.0f,x: 0.0f,w: -0.0f);
557 // v_a[31:0] is a->re and v_a[63:32] is a->im.
558 const __m128 v_a = _mm_loadl_pi(a: z, p: (const __m64*)a);
559 const __m128 v_b = _mm_loadl_pi(a: z, p: (const __m64*)b);
560 // x_1 = v[nx] * wave[dw].
561 const __m128 v_a_riri = _mm_shuffle_ps(v_a, v_a, _MM_SHUFFLE(0, 1, 0, 1));
562 const __m128 v_b_irri = _mm_shuffle_ps(v_b, v_b, _MM_SHUFFLE(1, 0, 0, 1));
563 const __m128 mul = _mm_mul_ps(a: v_a_riri, b: v_b_irri);
564 const __m128 xored = _mm_xor_ps(a: mul, b: neg_elem0);
565 return _mm_hadd_ps(a: xored, b: z);
566}
567
568// optimized radix-2 transform
569template<> struct DFT_VecR2<float> {
570 void operator()(Complex<float>* dst, const int c_n, const int n, const int dw0, const Complex<float>* wave) const {
571 const __m128 z = _mm_setzero_ps();
572 const int nx = n/2;
573 for(int i = 0 ; i < c_n; i += n)
574 {
575 {
576 Complex<float>* v = dst + i;
577 float r0 = v[0].re + v[nx].re;
578 float i0 = v[0].im + v[nx].im;
579 float r1 = v[0].re - v[nx].re;
580 float i1 = v[0].im - v[nx].im;
581 v[0].re = r0; v[0].im = i0;
582 v[nx].re = r1; v[nx].im = i1;
583 }
584
585 for( int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
586 {
587 Complex<float>* v = dst + i + j;
588 const __m128 x_1 = complexMul(a: &v[nx], b: &wave[dw]);
589 const __m128 v_0 = _mm_loadl_pi(a: z, p: (const __m64*)&v[0]);
590 _mm_storel_pi(p: (__m64*)&v[0], a: _mm_add_ps(a: v_0, b: x_1));
591 _mm_storel_pi(p: (__m64*)&v[nx], a: _mm_sub_ps(a: v_0, b: x_1));
592 }
593 }
594 }
595};
596
597// Optimized radix-3 implementation.
598template<> struct DFT_VecR3<float> {
599 void operator()(Complex<float>* dst, const int c_n, const int n, const int dw0, const Complex<float>* wave) const {
600 const int nx = n / 3;
601 const __m128 z = _mm_setzero_ps();
602 const __m128 neg_elem1 = _mm_set_ps(z: 0.0f,y: 0.0f,x: -0.0f,w: 0.0f);
603 const __m128 sin_120 = _mm_set1_ps(w: Constants<float>::sin_120);
604 const __m128 one_half = _mm_set1_ps(w: 0.5f);
605 for(int i = 0; i < c_n; i += n )
606 {
607 {
608 Complex<float>* v = dst + i;
609
610 float r1 = v[nx].re + v[nx*2].re;
611 float i1 = v[nx].im + v[nx*2].im;
612 float r0 = v[0].re;
613 float i0 = v[0].im;
614 float r2 = Constants<float>::sin_120*(v[nx].im - v[nx*2].im);
615 float i2 = Constants<float>::sin_120*(v[nx*2].re - v[nx].re);
616 v[0].re = r0 + r1; v[0].im = i0 + i1;
617 r0 -= (float)0.5*r1; i0 -= (float)0.5*i1;
618 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
619 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
620 }
621
622 for(int j = 1, dw = dw0; j < nx; j++, dw += dw0 )
623 {
624 Complex<float>* v = dst + i + j;
625 const __m128 x_0 = complexMul(a: &v[nx], b: &wave[dw]);
626 const __m128 x_2 = complexMul(a: &v[nx*2], b: &wave[dw*2]);
627 const __m128 x_1 = _mm_add_ps(a: x_0, b: x_2);
628
629 const __m128 v_0 = _mm_loadl_pi(a: z, p: (const __m64*)&v[0]);
630 _mm_storel_pi(p: (__m64*)&v[0], a: _mm_add_ps(a: v_0, b: x_1));
631
632 const __m128 x_3 = _mm_mul_ps(a: sin_120, b: _mm_xor_ps(a: _mm_sub_ps(a: x_2, b: x_0), b: neg_elem1));
633 const __m128 x_3s = _mm_shuffle_ps(x_3, x_3, _MM_SHUFFLE(0, 1, 0, 1));
634 const __m128 x_4 = _mm_sub_ps(a: v_0, b: _mm_mul_ps(a: one_half, b: x_1));
635 _mm_storel_pi(p: (__m64*)&v[nx], a: _mm_add_ps(a: x_4, b: x_3s));
636 _mm_storel_pi(p: (__m64*)&v[nx*2], a: _mm_sub_ps(a: x_4, b: x_3s));
637 }
638 }
639 }
640};
641
642// optimized radix-4 transform
643template<> struct DFT_VecR4<float>
644{
645 int operator()(Complex<float>* dst, int N, int n0, int& _dw0, const Complex<float>* wave) const
646 {
647 int n = 1, i, j, nx, dw, dw0 = _dw0;
648 __m128 z = _mm_setzero_ps(), x02=z, x13=z, w01=z, w23=z, y01, y23, t0, t1;
649 Cv32suf t; t.i = 0x80000000;
650 __m128 neg0_mask = _mm_load_ss(p: &t.f);
651 __m128 neg3_mask = _mm_shuffle_ps(neg0_mask, neg0_mask, _MM_SHUFFLE(0,1,2,3));
652
653 for( ; n*4 <= N; )
654 {
655 nx = n;
656 n *= 4;
657 dw0 /= 4;
658
659 for( i = 0; i < n0; i += n )
660 {
661 Complexf *v0, *v1;
662
663 v0 = dst + i;
664 v1 = v0 + nx*2;
665
666 x02 = _mm_loadl_pi(a: x02, p: (const __m64*)&v0[0]);
667 x13 = _mm_loadl_pi(a: x13, p: (const __m64*)&v0[nx]);
668 x02 = _mm_loadh_pi(a: x02, p: (const __m64*)&v1[0]);
669 x13 = _mm_loadh_pi(a: x13, p: (const __m64*)&v1[nx]);
670
671 y01 = _mm_add_ps(a: x02, b: x13);
672 y23 = _mm_sub_ps(a: x02, b: x13);
673 t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), b: neg3_mask);
674 t0 = _mm_movelh_ps(a: y01, b: y23);
675 y01 = _mm_add_ps(a: t0, b: t1);
676 y23 = _mm_sub_ps(a: t0, b: t1);
677
678 _mm_storel_pi(p: (__m64*)&v0[0], a: y01);
679 _mm_storeh_pi(p: (__m64*)&v0[nx], a: y01);
680 _mm_storel_pi(p: (__m64*)&v1[0], a: y23);
681 _mm_storeh_pi(p: (__m64*)&v1[nx], a: y23);
682
683 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
684 {
685 v0 = dst + i + j;
686 v1 = v0 + nx*2;
687
688 x13 = _mm_loadl_pi(a: x13, p: (const __m64*)&v0[nx]);
689 w23 = _mm_loadl_pi(a: w23, p: (const __m64*)&wave[dw*2]);
690 x13 = _mm_loadh_pi(a: x13, p: (const __m64*)&v1[nx]); // x1, x3 = r1 i1 r3 i3
691 w23 = _mm_loadh_pi(a: w23, p: (const __m64*)&wave[dw*3]); // w2, w3 = wr2 wi2 wr3 wi3
692
693 t0 = _mm_mul_ps(a: _mm_moveldup_ps(a: x13), b: w23);
694 t1 = _mm_mul_ps(a: _mm_movehdup_ps(a: x13), _mm_shuffle_ps(w23, w23, _MM_SHUFFLE(2,3,0,1)));
695 x13 = _mm_addsub_ps(a: t0, b: t1);
696 // re(x1*w2), im(x1*w2), re(x3*w3), im(x3*w3)
697 x02 = _mm_loadl_pi(a: x02, p: (const __m64*)&v1[0]); // x2 = r2 i2
698 w01 = _mm_loadl_pi(a: w01, p: (const __m64*)&wave[dw]); // w1 = wr1 wi1
699 x02 = _mm_shuffle_ps(x02, x02, _MM_SHUFFLE(0,0,1,1));
700 w01 = _mm_shuffle_ps(w01, w01, _MM_SHUFFLE(1,0,0,1));
701 x02 = _mm_mul_ps(a: x02, b: w01);
702 x02 = _mm_addsub_ps(a: x02, b: _mm_movelh_ps(a: x02, b: x02));
703 // re(x0) im(x0) re(x2*w1), im(x2*w1)
704 x02 = _mm_loadl_pi(a: x02, p: (const __m64*)&v0[0]);
705
706 y01 = _mm_add_ps(a: x02, b: x13);
707 y23 = _mm_sub_ps(a: x02, b: x13);
708 t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), b: neg3_mask);
709 t0 = _mm_movelh_ps(a: y01, b: y23);
710 y01 = _mm_add_ps(a: t0, b: t1);
711 y23 = _mm_sub_ps(a: t0, b: t1);
712
713 _mm_storel_pi(p: (__m64*)&v0[0], a: y01);
714 _mm_storeh_pi(p: (__m64*)&v0[nx], a: y01);
715 _mm_storel_pi(p: (__m64*)&v1[0], a: y23);
716 _mm_storeh_pi(p: (__m64*)&v1[nx], a: y23);
717 }
718 }
719 }
720
721 _dw0 = dw0;
722 return n;
723 }
724};
725
726#endif
727
728#ifdef USE_IPP_DFT
729static IppStatus ippsDFTFwd_CToC( const Complex<float>* src, Complex<float>* dst,
730 const void* spec, uchar* buf)
731{
732 return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_CToC_32fc, (const Ipp32fc*)src, (Ipp32fc*)dst,
733 (const IppsDFTSpec_C_32fc*)spec, buf);
734}
735
736static IppStatus ippsDFTFwd_CToC( const Complex<double>* src, Complex<double>* dst,
737 const void* spec, uchar* buf)
738{
739 return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_CToC_64fc, (const Ipp64fc*)src, (Ipp64fc*)dst,
740 (const IppsDFTSpec_C_64fc*)spec, buf);
741}
742
743static IppStatus ippsDFTInv_CToC( const Complex<float>* src, Complex<float>* dst,
744 const void* spec, uchar* buf)
745{
746 return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_CToC_32fc, (const Ipp32fc*)src, (Ipp32fc*)dst,
747 (const IppsDFTSpec_C_32fc*)spec, buf);
748}
749
750static IppStatus ippsDFTInv_CToC( const Complex<double>* src, Complex<double>* dst,
751 const void* spec, uchar* buf)
752{
753 return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_CToC_64fc, (const Ipp64fc*)src, (Ipp64fc*)dst,
754 (const IppsDFTSpec_C_64fc*)spec, buf);
755}
756
757static IppStatus ippsDFTFwd_RToPack( const float* src, float* dst,
758 const void* spec, uchar* buf)
759{
760 return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_RToPack_32f, src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
761}
762
763static IppStatus ippsDFTFwd_RToPack( const double* src, double* dst,
764 const void* spec, uchar* buf)
765{
766 return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_RToPack_64f, src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
767}
768
769static IppStatus ippsDFTInv_PackToR( const float* src, float* dst,
770 const void* spec, uchar* buf)
771{
772 return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_PackToR_32f, src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
773}
774
775static IppStatus ippsDFTInv_PackToR( const double* src, double* dst,
776 const void* spec, uchar* buf)
777{
778 return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_PackToR_64f, src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
779}
780#endif
781
782struct OcvDftOptions;
783
784typedef void (*DFTFunc)(const OcvDftOptions & c, const void* src, void* dst);
785
786struct OcvDftOptions {
787 int nf;
788 int *factors;
789 double scale;
790
791 int* itab;
792 void* wave;
793 int tab_size;
794 int n;
795
796 bool isInverse;
797 bool noPermute;
798 bool isComplex;
799
800 bool haveSSE3;
801
802 DFTFunc dft_func;
803 bool useIpp;
804
805#ifdef USE_IPP_DFT
806 uchar* ipp_spec;
807 uchar* ipp_work;
808#endif
809
810 OcvDftOptions()
811 {
812 nf = 0;
813 factors = 0;
814 scale = 0;
815 itab = 0;
816 wave = 0;
817 tab_size = 0;
818 n = 0;
819 isInverse = false;
820 noPermute = false;
821 isComplex = false;
822 useIpp = false;
823#ifdef USE_IPP_DFT
824 ipp_spec = 0;
825 ipp_work = 0;
826#endif
827 dft_func = 0;
828 haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
829 }
830};
831
832// mixed-radix complex discrete Fourier transform: double-precision version
833template<typename T> static void
834DFT(const OcvDftOptions & c, const Complex<T>* src, Complex<T>* dst)
835{
836 const Complex<T>* wave = (Complex<T>*)c.wave;
837 const int * itab = c.itab;
838
839 int n = c.n;
840 int f_idx, nx;
841 int inv = c.isInverse;
842 int dw0 = c.tab_size, dw;
843 int i, j, k;
844 Complex<T> t;
845 T scale = (T)c.scale;
846
847 if(typeid(T) == typeid(float))
848 {
849 CALL_HAL(dft, cv_hal_dft, reinterpret_cast<const uchar*>(src), reinterpret_cast<uchar*>(dst), CV_32F,
850 c.nf, c.factors, c.scale, c.itab, c.wave, c.tab_size, c.n, c.isInverse, c.noPermute);
851 }
852 if(typeid(T) == typeid(double))
853 {
854 CALL_HAL(dft, cv_hal_dft, reinterpret_cast<const uchar*>(src), reinterpret_cast<uchar*>(dst), CV_64F,
855 c.nf, c.factors, c.scale, c.itab, c.wave, c.tab_size, c.n, c.isInverse, c.noPermute);
856 }
857
858 if( c.useIpp )
859 {
860#ifdef USE_IPP_DFT
861 if( !inv )
862 {
863 if (ippsDFTFwd_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0)
864 {
865 CV_IMPL_ADD(CV_IMPL_IPP);
866 return;
867 }
868 }
869 else
870 {
871 if (ippsDFTInv_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0)
872 {
873 CV_IMPL_ADD(CV_IMPL_IPP);
874 return;
875 }
876 }
877 setIppErrorStatus();
878#endif
879 }
880
881 int tab_step = c.tab_size == n ? 1 : c.tab_size == n*2 ? 2 : c.tab_size/n;
882
883 // 0. shuffle data
884 if( dst != src )
885 {
886 CV_Assert( !c.noPermute );
887 if( !inv )
888 {
889 for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
890 {
891 int k0 = itab[0], k1 = itab[tab_step];
892 CV_Assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
893 dst[i] = src[k0]; dst[i+1] = src[k1];
894 }
895
896 if( i < n )
897 dst[n-1] = src[n-1];
898 }
899 else
900 {
901 for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
902 {
903 int k0 = itab[0], k1 = itab[tab_step];
904 CV_Assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
905 t.re = src[k0].re; t.im = -src[k0].im;
906 dst[i] = t;
907 t.re = src[k1].re; t.im = -src[k1].im;
908 dst[i+1] = t;
909 }
910
911 if( i < n )
912 {
913 t.re = src[n-1].re; t.im = -src[n-1].im;
914 dst[i] = t;
915 }
916 }
917 }
918 else
919 {
920 if( !c.noPermute )
921 {
922 CV_Assert( c.factors[0] == c.factors[c.nf-1] );
923 if( c.nf == 1 )
924 {
925 if( (n & 3) == 0 )
926 {
927 int n2 = n/2;
928 Complex<T>* dsth = dst + n2;
929
930 for( i = 0; i < n2; i += 2, itab += tab_step*2 )
931 {
932 j = itab[0];
933 CV_Assert( (unsigned)j < (unsigned)n2 );
934
935 CV_SWAP(dst[i+1], dsth[j], t);
936 if( j > i )
937 {
938 CV_SWAP(dst[i], dst[j], t);
939 CV_SWAP(dsth[i+1], dsth[j+1], t);
940 }
941 }
942 }
943 // else do nothing
944 }
945 else
946 {
947 for( i = 0; i < n; i++, itab += tab_step )
948 {
949 j = itab[0];
950 CV_Assert( (unsigned)j < (unsigned)n );
951 if( j > i )
952 CV_SWAP(dst[i], dst[j], t);
953 }
954 }
955 }
956
957 if( inv )
958 {
959 for( i = 0; i <= n - 2; i += 2 )
960 {
961 T t0 = -dst[i].im;
962 T t1 = -dst[i+1].im;
963 dst[i].im = t0; dst[i+1].im = t1;
964 }
965
966 if( i < n )
967 dst[n-1].im = -dst[n-1].im;
968 }
969 }
970
971 n = 1;
972 // 1. power-2 transforms
973 if( (c.factors[0] & 1) == 0 )
974 {
975 if( c.factors[0] >= 4 && c.haveSSE3)
976 {
977 DFT_VecR4<T> vr4;
978 n = vr4(dst, c.factors[0], c.n, dw0, wave);
979 }
980
981 // radix-4 transform
982 for( ; n*4 <= c.factors[0]; )
983 {
984 nx = n;
985 n *= 4;
986 dw0 /= 4;
987
988 for( i = 0; i < c.n; i += n )
989 {
990 Complex<T> *v0, *v1;
991 T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4;
992
993 v0 = dst + i;
994 v1 = v0 + nx*2;
995
996 r0 = v1[0].re; i0 = v1[0].im;
997 r4 = v1[nx].re; i4 = v1[nx].im;
998
999 r1 = r0 + r4; i1 = i0 + i4;
1000 r3 = i0 - i4; i3 = r4 - r0;
1001
1002 r2 = v0[0].re; i2 = v0[0].im;
1003 r4 = v0[nx].re; i4 = v0[nx].im;
1004
1005 r0 = r2 + r4; i0 = i2 + i4;
1006 r2 -= r4; i2 -= i4;
1007
1008 v0[0].re = r0 + r1; v0[0].im = i0 + i1;
1009 v1[0].re = r0 - r1; v1[0].im = i0 - i1;
1010 v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
1011 v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
1012
1013 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
1014 {
1015 v0 = dst + i + j;
1016 v1 = v0 + nx*2;
1017
1018 r2 = v0[nx].re*wave[dw*2].re - v0[nx].im*wave[dw*2].im;
1019 i2 = v0[nx].re*wave[dw*2].im + v0[nx].im*wave[dw*2].re;
1020 r0 = v1[0].re*wave[dw].im + v1[0].im*wave[dw].re;
1021 i0 = v1[0].re*wave[dw].re - v1[0].im*wave[dw].im;
1022 r3 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
1023 i3 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
1024
1025 r1 = i0 + i3; i1 = r0 + r3;
1026 r3 = r0 - r3; i3 = i3 - i0;
1027 r4 = v0[0].re; i4 = v0[0].im;
1028
1029 r0 = r4 + r2; i0 = i4 + i2;
1030 r2 = r4 - r2; i2 = i4 - i2;
1031
1032 v0[0].re = r0 + r1; v0[0].im = i0 + i1;
1033 v1[0].re = r0 - r1; v1[0].im = i0 - i1;
1034 v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
1035 v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
1036 }
1037 }
1038 }
1039
1040 for( ; n < c.factors[0]; )
1041 {
1042 // do the remaining radix-2 transform
1043 n *= 2;
1044 dw0 /= 2;
1045
1046 if(c.haveSSE3)
1047 {
1048 DFT_VecR2<T> vr2;
1049 vr2(dst, c.n, n, dw0, wave);
1050 }
1051 else
1052 {
1053 DFT_R2<T> vr2;
1054 vr2(dst, c.n, n, dw0, wave);
1055 }
1056 }
1057 }
1058
1059 // 2. all the other transforms
1060 for( f_idx = (c.factors[0]&1) ? 0 : 1; f_idx < c.nf; f_idx++ )
1061 {
1062 int factor = c.factors[f_idx];
1063 nx = n;
1064 n *= factor;
1065 dw0 /= factor;
1066
1067 if( factor == 3 )
1068 {
1069 if(c.haveSSE3)
1070 {
1071 DFT_VecR3<T> vr3;
1072 vr3(dst, c.n, n, dw0, wave);
1073 }
1074 else
1075 {
1076 DFT_R3<T> vr3;
1077 vr3(dst, c.n, n, dw0, wave);
1078 }
1079 }
1080 else if( factor == 5 )
1081 {
1082 DFT_R5<T> vr5;
1083 vr5(dst, c.n, n, dw0, wave);
1084 }
1085 else
1086 {
1087 // radix-"factor" - an odd number
1088 int p, q, factor2 = (factor - 1)/2;
1089 int d, dd, dw_f = c.tab_size/factor;
1090 AutoBuffer<Complex<T> > buf(factor2 * 2);
1091 Complex<T>* a = buf.data();
1092 Complex<T>* b = a + factor2;
1093
1094 for( i = 0; i < c.n; i += n )
1095 {
1096 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
1097 {
1098 Complex<T>* v = dst + i + j;
1099 Complex<T> v_0 = v[0];
1100 Complex<T> vn_0 = v_0;
1101
1102 if( j == 0 )
1103 {
1104 for( p = 1, k = nx; p <= factor2; p++, k += nx )
1105 {
1106 T r0 = v[k].re + v[n-k].re;
1107 T i0 = v[k].im - v[n-k].im;
1108 T r1 = v[k].re - v[n-k].re;
1109 T i1 = v[k].im + v[n-k].im;
1110
1111 vn_0.re += r0; vn_0.im += i1;
1112 a[p-1].re = r0; a[p-1].im = i0;
1113 b[p-1].re = r1; b[p-1].im = i1;
1114 }
1115 }
1116 else
1117 {
1118 const Complex<T>* wave_ = wave + dw*factor;
1119 d = dw;
1120
1121 for( p = 1, k = nx; p <= factor2; p++, k += nx, d += dw )
1122 {
1123 T r2 = v[k].re*wave[d].re - v[k].im*wave[d].im;
1124 T i2 = v[k].re*wave[d].im + v[k].im*wave[d].re;
1125
1126 T r1 = v[n-k].re*wave_[-d].re - v[n-k].im*wave_[-d].im;
1127 T i1 = v[n-k].re*wave_[-d].im + v[n-k].im*wave_[-d].re;
1128
1129 T r0 = r2 + r1;
1130 T i0 = i2 - i1;
1131 r1 = r2 - r1;
1132 i1 = i2 + i1;
1133
1134 vn_0.re += r0; vn_0.im += i1;
1135 a[p-1].re = r0; a[p-1].im = i0;
1136 b[p-1].re = r1; b[p-1].im = i1;
1137 }
1138 }
1139
1140 v[0] = vn_0;
1141
1142 for( p = 1, k = nx; p <= factor2; p++, k += nx )
1143 {
1144 Complex<T> s0 = v_0, s1 = v_0;
1145 d = dd = dw_f*p;
1146
1147 for( q = 0; q < factor2; q++ )
1148 {
1149 T r0 = wave[d].re * a[q].re;
1150 T i0 = wave[d].im * a[q].im;
1151 T r1 = wave[d].re * b[q].im;
1152 T i1 = wave[d].im * b[q].re;
1153
1154 s1.re += r0 + i0; s0.re += r0 - i0;
1155 s1.im += r1 - i1; s0.im += r1 + i1;
1156
1157 d += dd;
1158 d -= -(d >= c.tab_size) & c.tab_size;
1159 }
1160
1161 v[k] = s0;
1162 v[n-k] = s1;
1163 }
1164 }
1165 }
1166 }
1167 }
1168
1169 if( scale != 1 )
1170 {
1171 T re_scale = scale, im_scale = scale;
1172 if( inv )
1173 im_scale = -im_scale;
1174
1175 for( i = 0; i < c.n; i++ )
1176 {
1177 T t0 = dst[i].re*re_scale;
1178 T t1 = dst[i].im*im_scale;
1179 dst[i].re = t0;
1180 dst[i].im = t1;
1181 }
1182 }
1183 else if( inv )
1184 {
1185 for( i = 0; i <= c.n - 2; i += 2 )
1186 {
1187 T t0 = -dst[i].im;
1188 T t1 = -dst[i+1].im;
1189 dst[i].im = t0;
1190 dst[i+1].im = t1;
1191 }
1192
1193 if( i < c.n )
1194 dst[c.n-1].im = -dst[c.n-1].im;
1195 }
1196}
1197
1198
1199/* FFT of real vector
1200 output vector format:
1201 re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ...
1202 re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
1203template<typename T> static void
1204RealDFT(const OcvDftOptions & c, const T* src, T* dst)
1205{
1206 int n = c.n;
1207 int complex_output = c.isComplex;
1208 T scale = (T)c.scale;
1209 int j;
1210 dst += complex_output;
1211
1212 if( c.useIpp )
1213 {
1214#ifdef USE_IPP_DFT
1215 if (ippsDFTFwd_RToPack( src, dst, c.ipp_spec, c.ipp_work ) >=0)
1216 {
1217 if( complex_output )
1218 {
1219 dst[-1] = dst[0];
1220 dst[0] = 0;
1221 if( (n & 1) == 0 )
1222 dst[n] = 0;
1223 }
1224 CV_IMPL_ADD(CV_IMPL_IPP);
1225 return;
1226 }
1227 setIppErrorStatus();
1228#endif
1229 }
1230 CV_Assert( c.tab_size == n );
1231
1232 if( n == 1 )
1233 {
1234 dst[0] = src[0]*scale;
1235 }
1236 else if( n == 2 )
1237 {
1238 T t = (src[0] + src[1])*scale;
1239 dst[1] = (src[0] - src[1])*scale;
1240 dst[0] = t;
1241 }
1242 else if( n & 1 )
1243 {
1244 dst -= complex_output;
1245 Complex<T>* _dst = (Complex<T>*)dst;
1246 _dst[0].re = src[0]*scale;
1247 _dst[0].im = 0;
1248 for( j = 1; j < n; j += 2 )
1249 {
1250 T t0 = src[c.itab[j]]*scale;
1251 T t1 = src[c.itab[j+1]]*scale;
1252 _dst[j].re = t0;
1253 _dst[j].im = 0;
1254 _dst[j+1].re = t1;
1255 _dst[j+1].im = 0;
1256 }
1257 OcvDftOptions sub_c = c;
1258 sub_c.isComplex = false;
1259 sub_c.isInverse = false;
1260 sub_c.noPermute = true;
1261 sub_c.scale = 1.;
1262 DFT(sub_c, _dst, _dst);
1263 if( !complex_output )
1264 dst[1] = dst[0];
1265 }
1266 else
1267 {
1268 T t0, t;
1269 T h1_re, h1_im, h2_re, h2_im;
1270 T scale2 = scale*(T)0.5;
1271 int n2 = n >> 1;
1272
1273 c.factors[0] >>= 1;
1274
1275 OcvDftOptions sub_c = c;
1276 sub_c.factors += (c.factors[0] == 1);
1277 sub_c.nf -= (c.factors[0] == 1);
1278 sub_c.isComplex = false;
1279 sub_c.isInverse = false;
1280 sub_c.noPermute = false;
1281 sub_c.scale = 1.;
1282 sub_c.n = n2;
1283
1284 DFT(sub_c, (Complex<T>*)src, (Complex<T>*)dst);
1285
1286 c.factors[0] <<= 1;
1287
1288 t = dst[0] - dst[1];
1289 dst[0] = (dst[0] + dst[1])*scale;
1290 dst[1] = t*scale;
1291
1292 t0 = dst[n2];
1293 t = dst[n-1];
1294 dst[n-1] = dst[1];
1295
1296 const Complex<T> *wave = (const Complex<T>*)c.wave;
1297
1298 for( j = 2, wave++; j < n2; j += 2, wave++ )
1299 {
1300 /* calc odd */
1301 h2_re = scale2*(dst[j+1] + t);
1302 h2_im = scale2*(dst[n-j] - dst[j]);
1303
1304 /* calc even */
1305 h1_re = scale2*(dst[j] + dst[n-j]);
1306 h1_im = scale2*(dst[j+1] - t);
1307
1308 /* rotate */
1309 t = h2_re*wave->re - h2_im*wave->im;
1310 h2_im = h2_re*wave->im + h2_im*wave->re;
1311 h2_re = t;
1312 t = dst[n-j-1];
1313
1314 dst[j-1] = h1_re + h2_re;
1315 dst[n-j-1] = h1_re - h2_re;
1316 dst[j] = h1_im + h2_im;
1317 dst[n-j] = h2_im - h1_im;
1318 }
1319
1320 if( j <= n2 )
1321 {
1322 dst[n2-1] = t0*scale;
1323 dst[n2] = -t*scale;
1324 }
1325 }
1326
1327 if( complex_output && ((n & 1) == 0 || n == 1))
1328 {
1329 dst[-1] = dst[0];
1330 dst[0] = 0;
1331 if( n > 1 )
1332 dst[n] = 0;
1333 }
1334}
1335
1336/* Inverse FFT of complex conjugate-symmetric vector
1337 input vector format:
1338 re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR
1339 re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
1340template<typename T> static void
1341CCSIDFT(const OcvDftOptions & c, const T* src, T* dst)
1342{
1343 int n = c.n;
1344 int complex_input = c.isComplex;
1345 int j, k;
1346 T scale = (T)c.scale;
1347 T save_s1 = 0.;
1348 T t0, t1, t2, t3, t;
1349
1350 CV_Assert( c.tab_size == n );
1351
1352 if( complex_input )
1353 {
1354 CV_Assert( src != dst );
1355 save_s1 = src[1];
1356 ((T*)src)[1] = src[0];
1357 src++;
1358 }
1359 if( c.useIpp )
1360 {
1361#ifdef USE_IPP_DFT
1362 if (ippsDFTInv_PackToR( src, dst, c.ipp_spec, c.ipp_work ) >=0)
1363 {
1364 if( complex_input )
1365 ((T*)src)[0] = (T)save_s1;
1366 CV_IMPL_ADD(CV_IMPL_IPP);
1367 return;
1368 }
1369
1370 setIppErrorStatus();
1371#endif
1372 }
1373 if( n == 1 )
1374 {
1375 dst[0] = (T)(src[0]*scale);
1376 }
1377 else if( n == 2 )
1378 {
1379 t = (src[0] + src[1])*scale;
1380 dst[1] = (src[0] - src[1])*scale;
1381 dst[0] = t;
1382 }
1383 else if( n & 1 )
1384 {
1385 Complex<T>* _src = (Complex<T>*)(src-1);
1386 Complex<T>* _dst = (Complex<T>*)dst;
1387
1388 _dst[0].re = src[0];
1389 _dst[0].im = 0;
1390
1391 int n2 = (n+1) >> 1;
1392
1393 for( j = 1; j < n2; j++ )
1394 {
1395 int k0 = c.itab[j], k1 = c.itab[n-j];
1396 t0 = _src[j].re; t1 = _src[j].im;
1397 _dst[k0].re = t0; _dst[k0].im = -t1;
1398 _dst[k1].re = t0; _dst[k1].im = t1;
1399 }
1400
1401 OcvDftOptions sub_c = c;
1402 sub_c.isComplex = false;
1403 sub_c.isInverse = false;
1404 sub_c.noPermute = true;
1405 sub_c.scale = 1.;
1406 sub_c.n = n;
1407
1408 DFT(sub_c, _dst, _dst);
1409 dst[0] *= scale;
1410 for( j = 1; j < n; j += 2 )
1411 {
1412 t0 = dst[j*2]*scale;
1413 t1 = dst[j*2+2]*scale;
1414 dst[j] = t0;
1415 dst[j+1] = t1;
1416 }
1417 }
1418 else
1419 {
1420 int inplace = src == dst;
1421 const Complex<T>* w = (const Complex<T>*)c.wave;
1422
1423 t = src[1];
1424 t0 = (src[0] + src[n-1]);
1425 t1 = (src[n-1] - src[0]);
1426 dst[0] = t0;
1427 dst[1] = t1;
1428
1429 int n2 = (n+1) >> 1;
1430
1431 for( j = 2, w++; j < n2; j += 2, w++ )
1432 {
1433 T h1_re, h1_im, h2_re, h2_im;
1434
1435 h1_re = (t + src[n-j-1]);
1436 h1_im = (src[j] - src[n-j]);
1437
1438 h2_re = (t - src[n-j-1]);
1439 h2_im = (src[j] + src[n-j]);
1440
1441 t = h2_re*w->re + h2_im*w->im;
1442 h2_im = h2_im*w->re - h2_re*w->im;
1443 h2_re = t;
1444
1445 t = src[j+1];
1446 t0 = h1_re - h2_im;
1447 t1 = -h1_im - h2_re;
1448 t2 = h1_re + h2_im;
1449 t3 = h1_im - h2_re;
1450
1451 if( inplace )
1452 {
1453 dst[j] = t0;
1454 dst[j+1] = t1;
1455 dst[n-j] = t2;
1456 dst[n-j+1]= t3;
1457 }
1458 else
1459 {
1460 int j2 = j >> 1;
1461 k = c.itab[j2];
1462 dst[k] = t0;
1463 dst[k+1] = t1;
1464 k = c.itab[n2-j2];
1465 dst[k] = t2;
1466 dst[k+1]= t3;
1467 }
1468 }
1469
1470 if( j <= n2 )
1471 {
1472 t0 = t*2;
1473 t1 = src[n2]*2;
1474
1475 if( inplace )
1476 {
1477 dst[n2] = t0;
1478 dst[n2+1] = t1;
1479 }
1480 else
1481 {
1482 k = c.itab[n2];
1483 dst[k*2] = t0;
1484 dst[k*2+1] = t1;
1485 }
1486 }
1487
1488 c.factors[0] >>= 1;
1489
1490 OcvDftOptions sub_c = c;
1491 sub_c.factors += (c.factors[0] == 1);
1492 sub_c.nf -= (c.factors[0] == 1);
1493 sub_c.isComplex = false;
1494 sub_c.isInverse = false;
1495 sub_c.noPermute = !inplace;
1496 sub_c.scale = 1.;
1497 sub_c.n = n2;
1498
1499 DFT(sub_c, (Complex<T>*)dst, (Complex<T>*)dst);
1500
1501 c.factors[0] <<= 1;
1502
1503 for( j = 0; j < n; j += 2 )
1504 {
1505 t0 = dst[j]*scale;
1506 t1 = dst[j+1]*(-scale);
1507 dst[j] = t0;
1508 dst[j+1] = t1;
1509 }
1510 }
1511 if( complex_input )
1512 ((T*)src)[0] = (T)save_s1;
1513}
1514
1515static void
1516CopyColumn( const uchar* _src, size_t src_step,
1517 uchar* _dst, size_t dst_step,
1518 int len, size_t elem_size )
1519{
1520 int i, t0, t1;
1521 const int* src = (const int*)_src;
1522 int* dst = (int*)_dst;
1523 src_step /= sizeof(src[0]);
1524 dst_step /= sizeof(dst[0]);
1525
1526 if( elem_size == sizeof(int) )
1527 {
1528 for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1529 dst[0] = src[0];
1530 }
1531 else if( elem_size == sizeof(int)*2 )
1532 {
1533 for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1534 {
1535 t0 = src[0]; t1 = src[1];
1536 dst[0] = t0; dst[1] = t1;
1537 }
1538 }
1539 else if( elem_size == sizeof(int)*4 )
1540 {
1541 for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1542 {
1543 t0 = src[0]; t1 = src[1];
1544 dst[0] = t0; dst[1] = t1;
1545 t0 = src[2]; t1 = src[3];
1546 dst[2] = t0; dst[3] = t1;
1547 }
1548 }
1549}
1550
1551
1552static void
1553CopyFrom2Columns( const uchar* _src, size_t src_step,
1554 uchar* _dst0, uchar* _dst1,
1555 int len, size_t elem_size )
1556{
1557 int i, t0, t1;
1558 const int* src = (const int*)_src;
1559 int* dst0 = (int*)_dst0;
1560 int* dst1 = (int*)_dst1;
1561 src_step /= sizeof(src[0]);
1562
1563 if( elem_size == sizeof(int) )
1564 {
1565 for( i = 0; i < len; i++, src += src_step )
1566 {
1567 t0 = src[0]; t1 = src[1];
1568 dst0[i] = t0; dst1[i] = t1;
1569 }
1570 }
1571 else if( elem_size == sizeof(int)*2 )
1572 {
1573 for( i = 0; i < len*2; i += 2, src += src_step )
1574 {
1575 t0 = src[0]; t1 = src[1];
1576 dst0[i] = t0; dst0[i+1] = t1;
1577 t0 = src[2]; t1 = src[3];
1578 dst1[i] = t0; dst1[i+1] = t1;
1579 }
1580 }
1581 else if( elem_size == sizeof(int)*4 )
1582 {
1583 for( i = 0; i < len*4; i += 4, src += src_step )
1584 {
1585 t0 = src[0]; t1 = src[1];
1586 dst0[i] = t0; dst0[i+1] = t1;
1587 t0 = src[2]; t1 = src[3];
1588 dst0[i+2] = t0; dst0[i+3] = t1;
1589 t0 = src[4]; t1 = src[5];
1590 dst1[i] = t0; dst1[i+1] = t1;
1591 t0 = src[6]; t1 = src[7];
1592 dst1[i+2] = t0; dst1[i+3] = t1;
1593 }
1594 }
1595}
1596
1597
1598static void
1599CopyTo2Columns( const uchar* _src0, const uchar* _src1,
1600 uchar* _dst, size_t dst_step,
1601 int len, size_t elem_size )
1602{
1603 int i, t0, t1;
1604 const int* src0 = (const int*)_src0;
1605 const int* src1 = (const int*)_src1;
1606 int* dst = (int*)_dst;
1607 dst_step /= sizeof(dst[0]);
1608
1609 if( elem_size == sizeof(int) )
1610 {
1611 for( i = 0; i < len; i++, dst += dst_step )
1612 {
1613 t0 = src0[i]; t1 = src1[i];
1614 dst[0] = t0; dst[1] = t1;
1615 }
1616 }
1617 else if( elem_size == sizeof(int)*2 )
1618 {
1619 for( i = 0; i < len*2; i += 2, dst += dst_step )
1620 {
1621 t0 = src0[i]; t1 = src0[i+1];
1622 dst[0] = t0; dst[1] = t1;
1623 t0 = src1[i]; t1 = src1[i+1];
1624 dst[2] = t0; dst[3] = t1;
1625 }
1626 }
1627 else if( elem_size == sizeof(int)*4 )
1628 {
1629 for( i = 0; i < len*4; i += 4, dst += dst_step )
1630 {
1631 t0 = src0[i]; t1 = src0[i+1];
1632 dst[0] = t0; dst[1] = t1;
1633 t0 = src0[i+2]; t1 = src0[i+3];
1634 dst[2] = t0; dst[3] = t1;
1635 t0 = src1[i]; t1 = src1[i+1];
1636 dst[4] = t0; dst[5] = t1;
1637 t0 = src1[i+2]; t1 = src1[i+3];
1638 dst[6] = t0; dst[7] = t1;
1639 }
1640 }
1641}
1642
1643
1644static void
1645ExpandCCS( uchar* _ptr, int n, int elem_size )
1646{
1647 int i;
1648 if( elem_size == (int)sizeof(float) )
1649 {
1650 float* p = (float*)_ptr;
1651 for( i = 1; i < (n+1)/2; i++ )
1652 {
1653 p[(n-i)*2] = p[i*2-1];
1654 p[(n-i)*2+1] = -p[i*2];
1655 }
1656 if( (n & 1) == 0 )
1657 {
1658 p[n] = p[n-1];
1659 p[n+1] = 0.f;
1660 n--;
1661 }
1662 for( i = n-1; i > 0; i-- )
1663 p[i+1] = p[i];
1664 p[1] = 0.f;
1665 }
1666 else
1667 {
1668 double* p = (double*)_ptr;
1669 for( i = 1; i < (n+1)/2; i++ )
1670 {
1671 p[(n-i)*2] = p[i*2-1];
1672 p[(n-i)*2+1] = -p[i*2];
1673 }
1674 if( (n & 1) == 0 )
1675 {
1676 p[n] = p[n-1];
1677 p[n+1] = 0.f;
1678 n--;
1679 }
1680 for( i = n-1; i > 0; i-- )
1681 p[i+1] = p[i];
1682 p[1] = 0.f;
1683 }
1684}
1685
1686static void DFT_32f(const OcvDftOptions & c, const Complexf* src, Complexf* dst)
1687{
1688 DFT(c, src, dst);
1689}
1690
1691static void DFT_64f(const OcvDftOptions & c, const Complexd* src, Complexd* dst)
1692{
1693 DFT(c, src, dst);
1694}
1695
1696
1697static void RealDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
1698{
1699 RealDFT(c, src, dst);
1700}
1701
1702static void RealDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
1703{
1704 RealDFT(c, src, dst);
1705}
1706
1707static void CCSIDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
1708{
1709 CCSIDFT(c, src, dst);
1710}
1711
1712static void CCSIDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
1713{
1714 CCSIDFT(c, src, dst);
1715}
1716
1717}
1718
1719#ifdef USE_IPP_DFT
1720typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*);
1721typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
1722#endif
1723
1724namespace cv
1725{
1726#if defined USE_IPP_DFT
1727
1728typedef IppStatus (CV_STDCALL* ippiDFT_C_Func)(const Ipp32fc*, int, Ipp32fc*, int, const IppiDFTSpec_C_32fc*, Ipp8u*);
1729typedef IppStatus (CV_STDCALL* ippiDFT_R_Func)(const Ipp32f* , int, Ipp32f* , int, const IppiDFTSpec_R_32f* , Ipp8u*);
1730
1731template <typename Dft>
1732class Dft_C_IPPLoop_Invoker : public ParallelLoopBody
1733{
1734public:
1735
1736 Dft_C_IPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width,
1737 const Dft& _ippidft, int _norm_flag, bool *_ok) :
1738 ParallelLoopBody(),
1739 src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
1740 ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
1741 {
1742 *ok = true;
1743 }
1744
1745 virtual void operator()(const Range& range) const CV_OVERRIDE
1746 {
1747 IppStatus status;
1748 Ipp8u* pBuffer = 0;
1749 Ipp8u* pMemInit= 0;
1750 int sizeBuffer=0;
1751 int sizeSpec=0;
1752 int sizeInit=0;
1753
1754 IppiSize srcRoiSize = {.width: width, .height: 1};
1755
1756 status = ippiDFTGetSize_C_32fc(roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pSizeSpec: &sizeSpec, pSizeInit: &sizeInit, pSizeBuf: &sizeBuffer );
1757 if ( status < 0 )
1758 {
1759 *ok = false;
1760 return;
1761 }
1762
1763 IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)CV_IPP_MALLOC( sizeSpec );
1764
1765 if ( sizeInit > 0 )
1766 pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1767
1768 if ( sizeBuffer > 0 )
1769 pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1770
1771 status = ippiDFTInit_C_32fc( roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pDFTSpec, pMemInit );
1772
1773 if ( sizeInit > 0 )
1774 ippFree( ptr: pMemInit );
1775
1776 if ( status < 0 )
1777 {
1778 ippFree( ptr: pDFTSpec );
1779 if ( sizeBuffer > 0 )
1780 ippFree( ptr: pBuffer );
1781 *ok = false;
1782 return;
1783 }
1784
1785 for( int i = range.start; i < range.end; ++i)
1786 if(!ippidft((Ipp32fc*)(src + src_step * i), src_step, (Ipp32fc*)(dst + dst_step * i), dst_step,
1787 pDFTSpec, (Ipp8u*)pBuffer))
1788 {
1789 *ok = false;
1790 }
1791
1792 if ( sizeBuffer > 0 )
1793 ippFree( ptr: pBuffer );
1794
1795 ippFree( ptr: pDFTSpec );
1796 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1797 }
1798
1799private:
1800 const uchar * src;
1801 size_t src_step;
1802 uchar * dst;
1803 size_t dst_step;
1804 int width;
1805 const Dft& ippidft;
1806 int norm_flag;
1807 bool *ok;
1808
1809 const Dft_C_IPPLoop_Invoker& operator= (const Dft_C_IPPLoop_Invoker&);
1810};
1811
1812template <typename Dft>
1813class Dft_R_IPPLoop_Invoker : public ParallelLoopBody
1814{
1815public:
1816
1817 Dft_R_IPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width,
1818 const Dft& _ippidft, int _norm_flag, bool *_ok) :
1819 ParallelLoopBody(),
1820 src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
1821 ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
1822 {
1823 *ok = true;
1824 }
1825
1826 virtual void operator()(const Range& range) const CV_OVERRIDE
1827 {
1828 IppStatus status;
1829 Ipp8u* pBuffer = 0;
1830 Ipp8u* pMemInit= 0;
1831 int sizeBuffer=0;
1832 int sizeSpec=0;
1833 int sizeInit=0;
1834
1835 IppiSize srcRoiSize = {.width: width, .height: 1};
1836
1837 status = ippiDFTGetSize_R_32f(roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pSizeSpec: &sizeSpec, pSizeInit: &sizeInit, pSizeBuf: &sizeBuffer );
1838 if ( status < 0 )
1839 {
1840 *ok = false;
1841 return;
1842 }
1843
1844 IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)CV_IPP_MALLOC( sizeSpec );
1845
1846 if ( sizeInit > 0 )
1847 pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1848
1849 if ( sizeBuffer > 0 )
1850 pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1851
1852 status = ippiDFTInit_R_32f( roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pDFTSpec, pMemInit );
1853
1854 if ( sizeInit > 0 )
1855 ippFree( ptr: pMemInit );
1856
1857 if ( status < 0 )
1858 {
1859 ippFree( ptr: pDFTSpec );
1860 if ( sizeBuffer > 0 )
1861 ippFree( ptr: pBuffer );
1862 *ok = false;
1863 return;
1864 }
1865
1866 for( int i = range.start; i < range.end; ++i)
1867 if(!ippidft((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step,
1868 pDFTSpec, (Ipp8u*)pBuffer))
1869 {
1870 *ok = false;
1871 }
1872
1873 if ( sizeBuffer > 0 )
1874 ippFree( ptr: pBuffer );
1875
1876 ippFree( ptr: pDFTSpec );
1877 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1878 }
1879
1880private:
1881 const uchar * src;
1882 size_t src_step;
1883 uchar * dst;
1884 size_t dst_step;
1885 int width;
1886 const Dft& ippidft;
1887 int norm_flag;
1888 bool *ok;
1889
1890 const Dft_R_IPPLoop_Invoker& operator= (const Dft_R_IPPLoop_Invoker&);
1891};
1892
1893template <typename Dft>
1894bool Dft_C_IPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, const Dft& ippidft, int norm_flag)
1895{
1896 bool ok;
1897 parallel_for_(Range(0, height), Dft_C_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
1898 return ok;
1899}
1900
1901template <typename Dft>
1902bool Dft_R_IPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, const Dft& ippidft, int norm_flag)
1903{
1904 bool ok;
1905 parallel_for_(Range(0, height), Dft_R_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
1906 return ok;
1907}
1908
1909struct IPPDFT_C_Functor
1910{
1911 IPPDFT_C_Functor(ippiDFT_C_Func _func) : ippiDFT_CToC_32fc_C1R(_func){}
1912
1913 bool operator()(const Ipp32fc* src, size_t srcStep, Ipp32fc* dst, size_t dstStep, const IppiDFTSpec_C_32fc* pDFTSpec, Ipp8u* pBuffer) const
1914 {
1915 return ippiDFT_CToC_32fc_C1R ? CV_INSTRUMENT_FUN_IPP(ippiDFT_CToC_32fc_C1R, src, static_cast<int>(srcStep), dst, static_cast<int>(dstStep), pDFTSpec, pBuffer) >= 0 : false;
1916 }
1917private:
1918 ippiDFT_C_Func ippiDFT_CToC_32fc_C1R;
1919};
1920
1921struct IPPDFT_R_Functor
1922{
1923 IPPDFT_R_Functor(ippiDFT_R_Func _func) : ippiDFT_PackToR_32f_C1R(_func){}
1924
1925 bool operator()(const Ipp32f* src, size_t srcStep, Ipp32f* dst, size_t dstStep, const IppiDFTSpec_R_32f* pDFTSpec, Ipp8u* pBuffer) const
1926 {
1927 return ippiDFT_PackToR_32f_C1R ? CV_INSTRUMENT_FUN_IPP(ippiDFT_PackToR_32f_C1R, src, static_cast<int>(srcStep), dst, static_cast<int>(dstStep), pDFTSpec, pBuffer) >= 0 : false;
1928 }
1929private:
1930 ippiDFT_R_Func ippiDFT_PackToR_32f_C1R;
1931};
1932
1933static bool ippi_DFT_C_32F(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, int norm_flag)
1934{
1935 CV_INSTRUMENT_REGION_IPP();
1936
1937 IppStatus status;
1938 Ipp8u* pBuffer = 0;
1939 Ipp8u* pMemInit= 0;
1940 int sizeBuffer=0;
1941 int sizeSpec=0;
1942 int sizeInit=0;
1943
1944 IppiSize srcRoiSize = {.width: width, .height: height};
1945
1946 status = ippiDFTGetSize_C_32fc(roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pSizeSpec: &sizeSpec, pSizeInit: &sizeInit, pSizeBuf: &sizeBuffer );
1947 if ( status < 0 )
1948 return false;
1949
1950 IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)CV_IPP_MALLOC( sizeSpec );
1951
1952 if ( sizeInit > 0 )
1953 pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1954
1955 if ( sizeBuffer > 0 )
1956 pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1957
1958 status = ippiDFTInit_C_32fc( roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pDFTSpec, pMemInit );
1959
1960 if ( sizeInit > 0 )
1961 ippFree( ptr: pMemInit );
1962
1963 if ( status < 0 )
1964 {
1965 ippFree( ptr: pDFTSpec );
1966 if ( sizeBuffer > 0 )
1967 ippFree( ptr: pBuffer );
1968 return false;
1969 }
1970
1971 if (!inv)
1972 status = CV_INSTRUMENT_FUN_IPP(ippiDFTFwd_CToC_32fc_C1R, (Ipp32fc*)src, static_cast<int>(src_step), (Ipp32fc*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
1973 else
1974 status = CV_INSTRUMENT_FUN_IPP(ippiDFTInv_CToC_32fc_C1R, (Ipp32fc*)src, static_cast<int>(src_step), (Ipp32fc*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
1975
1976 if ( sizeBuffer > 0 )
1977 ippFree( ptr: pBuffer );
1978
1979 ippFree( ptr: pDFTSpec );
1980
1981 if(status >= 0)
1982 {
1983 CV_IMPL_ADD(CV_IMPL_IPP);
1984 return true;
1985 }
1986 return false;
1987}
1988
1989static bool ippi_DFT_R_32F(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, int norm_flag)
1990{
1991 CV_INSTRUMENT_REGION_IPP();
1992
1993 IppStatus status;
1994 Ipp8u* pBuffer = 0;
1995 Ipp8u* pMemInit= 0;
1996 int sizeBuffer=0;
1997 int sizeSpec=0;
1998 int sizeInit=0;
1999
2000 IppiSize srcRoiSize = {.width: width, .height: height};
2001
2002 status = ippiDFTGetSize_R_32f(roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pSizeSpec: &sizeSpec, pSizeInit: &sizeInit, pSizeBuf: &sizeBuffer );
2003 if ( status < 0 )
2004 return false;
2005
2006 IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)CV_IPP_MALLOC( sizeSpec );
2007
2008 if ( sizeInit > 0 )
2009 pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
2010
2011 if ( sizeBuffer > 0 )
2012 pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
2013
2014 status = ippiDFTInit_R_32f( roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pDFTSpec, pMemInit );
2015
2016 if ( sizeInit > 0 )
2017 ippFree( ptr: pMemInit );
2018
2019 if ( status < 0 )
2020 {
2021 ippFree( ptr: pDFTSpec );
2022 if ( sizeBuffer > 0 )
2023 ippFree( ptr: pBuffer );
2024 return false;
2025 }
2026
2027 if (!inv)
2028 status = CV_INSTRUMENT_FUN_IPP(ippiDFTFwd_RToPack_32f_C1R, (float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
2029 else
2030 status = CV_INSTRUMENT_FUN_IPP(ippiDFTInv_PackToR_32f_C1R, (float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
2031
2032 if ( sizeBuffer > 0 )
2033 ippFree( ptr: pBuffer );
2034
2035 ippFree( ptr: pDFTSpec );
2036
2037 if(status >= 0)
2038 {
2039 CV_IMPL_ADD(CV_IMPL_IPP);
2040 return true;
2041 }
2042 return false;
2043}
2044
2045#endif
2046}
2047
2048#ifdef HAVE_OPENCL
2049
2050namespace cv
2051{
2052
2053enum FftType
2054{
2055 R2R = 0, // real to CCS in case forward transform, CCS to real otherwise
2056 C2R = 1, // complex to real in case inverse transform
2057 R2C = 2, // real to complex in case forward transform
2058 C2C = 3 // complex to complex
2059};
2060
2061struct OCL_FftPlan
2062{
2063private:
2064 UMat twiddles;
2065 String buildOptions;
2066 int thread_count;
2067 int dft_size;
2068 int dft_depth;
2069 bool status;
2070
2071public:
2072 OCL_FftPlan(int _size, int _depth) : dft_size(_size), dft_depth(_depth), status(true)
2073 {
2074 CV_Assert( dft_depth == CV_32F || dft_depth == CV_64F );
2075
2076 int min_radix;
2077 std::vector<int> radixes, blocks;
2078 ocl_getRadixes(cols: dft_size, radixes, blocks, min_radix);
2079 thread_count = dft_size / min_radix;
2080
2081 if (thread_count > (int) ocl::Device::getDefault().maxWorkGroupSize())
2082 {
2083 status = false;
2084 return;
2085 }
2086
2087 // generate string with radix calls
2088 String radix_processing;
2089 int n = 1, twiddle_size = 0;
2090 for (size_t i=0; i<radixes.size(); i++)
2091 {
2092 int radix = radixes[i], block = blocks[i];
2093 if (block > 1)
2094 radix_processing += format(fmt: "fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);", radix, block, twiddle_size, n, dft_size/radix);
2095 else
2096 radix_processing += format(fmt: "fft_radix%d(smem,twiddles+%d,ind,%d,%d);", radix, twiddle_size, n, dft_size/radix);
2097 twiddle_size += (radix-1)*n;
2098 n *= radix;
2099 }
2100
2101 twiddles.create(rows: 1, cols: twiddle_size, CV_MAKE_TYPE(dft_depth, 2));
2102 if (dft_depth == CV_32F)
2103 fillRadixTable<float>(twiddles, radixes);
2104 else
2105 fillRadixTable<double>(twiddles, radixes);
2106
2107 buildOptions = format(fmt: "-D LOCAL_SIZE=%d -D kercn=%d -D FT=%s -D CT=%s%s -D RADIX_PROCESS=%s",
2108 dft_size, min_radix, ocl::typeToStr(t: dft_depth), ocl::typeToStr(CV_MAKE_TYPE(dft_depth, 2)),
2109 dft_depth == CV_64F ? " -D DOUBLE_SUPPORT" : "", radix_processing.c_str());
2110 }
2111
2112 bool enqueueTransform(InputArray _src, OutputArray _dst, int num_dfts, int flags, int fftType, bool rows = true) const
2113 {
2114 if (!status)
2115 return false;
2116
2117 UMat src = _src.getUMat();
2118 UMat dst = _dst.getUMat();
2119
2120 size_t globalsize[2];
2121 size_t localsize[2];
2122 String kernel_name;
2123
2124 bool is1d = (flags & DFT_ROWS) != 0 || num_dfts == 1;
2125 bool inv = (flags & DFT_INVERSE) != 0;
2126 String options = buildOptions;
2127
2128 if (rows)
2129 {
2130 globalsize[0] = thread_count; globalsize[1] = src.rows;
2131 localsize[0] = thread_count; localsize[1] = 1;
2132 kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows";
2133 if ((is1d || inv) && (flags & DFT_SCALE))
2134 options += " -D DFT_SCALE";
2135 }
2136 else
2137 {
2138 globalsize[0] = num_dfts; globalsize[1] = thread_count;
2139 localsize[0] = 1; localsize[1] = thread_count;
2140 kernel_name = !inv ? "fft_multi_radix_cols" : "ifft_multi_radix_cols";
2141 if (flags & DFT_SCALE)
2142 options += " -D DFT_SCALE";
2143 }
2144
2145 options += src.channels() == 1 ? " -D REAL_INPUT" : " -D COMPLEX_INPUT";
2146 options += dst.channels() == 1 ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT";
2147 options += is1d ? " -D IS_1D" : "";
2148
2149 if (!inv)
2150 {
2151 if ((is1d && src.channels() == 1) || (rows && (fftType == R2R)))
2152 options += " -D NO_CONJUGATE";
2153 }
2154 else
2155 {
2156 if (rows && (fftType == C2R || fftType == R2R))
2157 options += " -D NO_CONJUGATE";
2158 if (dst.cols % 2 == 0)
2159 options += " -D EVEN";
2160 }
2161
2162 ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options);
2163 if (k.empty())
2164 return false;
2165
2166 k.args(kernel_args: ocl::KernelArg::ReadOnly(m: src), kernel_args: ocl::KernelArg::WriteOnly(m: dst), kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: twiddles), kernel_args: thread_count, kernel_args: num_dfts);
2167 return k.run(dims: 2, globalsize, localsize, sync: false);
2168 }
2169
2170private:
2171 static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix)
2172 {
2173 int factors[34];
2174 int nf = DFTFactorize(n: cols, factors);
2175
2176 int n = 1;
2177 int factor_index = 0;
2178 min_radix = INT_MAX;
2179
2180 // 2^n transforms
2181 if ((factors[factor_index] & 1) == 0)
2182 {
2183 for( ; n < factors[factor_index];)
2184 {
2185 int radix = 2, block = 1;
2186 if (8*n <= factors[0])
2187 radix = 8;
2188 else if (4*n <= factors[0])
2189 {
2190 radix = 4;
2191 if (cols % 12 == 0)
2192 block = 3;
2193 else if (cols % 8 == 0)
2194 block = 2;
2195 }
2196 else
2197 {
2198 if (cols % 10 == 0)
2199 block = 5;
2200 else if (cols % 8 == 0)
2201 block = 4;
2202 else if (cols % 6 == 0)
2203 block = 3;
2204 else if (cols % 4 == 0)
2205 block = 2;
2206 }
2207
2208 radixes.push_back(x: radix);
2209 blocks.push_back(x: block);
2210 min_radix = min(a: min_radix, b: block*radix);
2211 n *= radix;
2212 }
2213 factor_index++;
2214 }
2215
2216 // all the other transforms
2217 for( ; factor_index < nf; factor_index++)
2218 {
2219 int radix = factors[factor_index], block = 1;
2220 if (radix == 3)
2221 {
2222 if (cols % 12 == 0)
2223 block = 4;
2224 else if (cols % 9 == 0)
2225 block = 3;
2226 else if (cols % 6 == 0)
2227 block = 2;
2228 }
2229 else if (radix == 5)
2230 {
2231 if (cols % 10 == 0)
2232 block = 2;
2233 }
2234 radixes.push_back(x: radix);
2235 blocks.push_back(x: block);
2236 min_radix = min(a: min_radix, b: block*radix);
2237 }
2238 }
2239
2240 template <typename T>
2241 static void fillRadixTable(UMat twiddles, const std::vector<int>& radixes)
2242 {
2243 Mat tw = twiddles.getMat(flags: ACCESS_WRITE);
2244 T* ptr = tw.ptr<T>();
2245 int ptr_index = 0;
2246
2247 int n = 1;
2248 for (size_t i=0; i<radixes.size(); i++)
2249 {
2250 int radix = radixes[i];
2251 n *= radix;
2252
2253 for (int j=1; j<radix; j++)
2254 {
2255 double theta = -CV_2PI*j/n;
2256
2257 for (int k=0; k<(n/radix); k++)
2258 {
2259 ptr[ptr_index++] = (T) cos(x: k*theta);
2260 ptr[ptr_index++] = (T) sin(x: k*theta);
2261 }
2262 }
2263 }
2264 }
2265};
2266
2267class OCL_FftPlanCache
2268{
2269public:
2270 static OCL_FftPlanCache & getInstance()
2271 {
2272 CV_SINGLETON_LAZY_INIT_REF(OCL_FftPlanCache, new OCL_FftPlanCache())
2273 }
2274
2275 Ptr<OCL_FftPlan> getFftPlan(int dft_size, int depth)
2276 {
2277 int key = (dft_size << 16) | (depth & 0xFFFF);
2278 std::map<int, Ptr<OCL_FftPlan> >::iterator f = planStorage.find(x: key);
2279 if (f != planStorage.end())
2280 {
2281 return f->second;
2282 }
2283 else
2284 {
2285 Ptr<OCL_FftPlan> newPlan = Ptr<OCL_FftPlan>(new OCL_FftPlan(dft_size, depth));
2286 planStorage[key] = newPlan;
2287 return newPlan;
2288 }
2289 }
2290
2291 ~OCL_FftPlanCache()
2292 {
2293 planStorage.clear();
2294 }
2295
2296protected:
2297 OCL_FftPlanCache() :
2298 planStorage()
2299 {
2300 }
2301 std::map<int, Ptr<OCL_FftPlan> > planStorage;
2302};
2303
2304static bool ocl_dft_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType)
2305{
2306 int type = _src.type(), depth = CV_MAT_DEPTH(type);
2307 Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(dft_size: _src.cols(), depth);
2308 return plan->enqueueTransform(_src, _dst, num_dfts: nonzero_rows, flags, fftType, rows: true);
2309}
2310
2311static bool ocl_dft_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType)
2312{
2313 int type = _src.type(), depth = CV_MAT_DEPTH(type);
2314 Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(dft_size: _src.rows(), depth);
2315 return plan->enqueueTransform(_src, _dst, num_dfts: nonzero_cols, flags, fftType, rows: false);
2316}
2317
2318inline FftType determineFFTType(bool real_input, bool complex_input, bool real_output, bool complex_output, bool inv)
2319{
2320 // output format is not specified
2321 if (!real_output && !complex_output)
2322 complex_output = true;
2323
2324 // input or output format is ambiguous
2325 if (real_input == complex_input || real_output == complex_output)
2326 CV_Error(Error::StsBadArg, "Invalid FFT input or output format");
2327
2328 FftType result = real_input ? (real_output ? R2R : R2C) : (real_output ? C2R : C2C);
2329
2330 // Forward Complex to CCS not supported
2331 if (result == C2R && !inv)
2332 result = C2C;
2333
2334 // Inverse CCS to Complex not supported
2335 if (result == R2C && inv)
2336 result = R2R;
2337
2338 return result;
2339}
2340
2341static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows)
2342{
2343 int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
2344 Size ssize = _src.size();
2345 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
2346
2347 if (!(cn == 1 || cn == 2)
2348 || !(depth == CV_32F || (depth == CV_64F && doubleSupport))
2349 || ((flags & DFT_REAL_OUTPUT) && (flags & DFT_COMPLEX_OUTPUT)))
2350 return false;
2351
2352 // if is not a multiplication of prime numbers { 2, 3, 5 }
2353 if (ssize.area() != getOptimalDFTSize(vecsize: ssize.area()))
2354 return false;
2355
2356 UMat src = _src.getUMat();
2357 bool inv = (flags & DFT_INVERSE) != 0 ? 1 : 0;
2358
2359 if( nonzero_rows <= 0 || nonzero_rows > _src.rows() )
2360 nonzero_rows = _src.rows();
2361 bool is1d = (flags & DFT_ROWS) != 0 || nonzero_rows == 1;
2362
2363 FftType fftType = determineFFTType(real_input: cn == 1, complex_input: cn == 2,
2364 real_output: (flags & DFT_REAL_OUTPUT) != 0, complex_output: (flags & DFT_COMPLEX_OUTPUT) != 0, inv);
2365
2366 UMat output;
2367 if (fftType == C2C || fftType == R2C)
2368 {
2369 // complex output
2370 _dst.create(sz: src.size(), CV_MAKETYPE(depth, 2));
2371 output = _dst.getUMat();
2372 }
2373 else
2374 {
2375 // real output
2376 if (is1d)
2377 {
2378 _dst.create(sz: src.size(), CV_MAKETYPE(depth, 1));
2379 output = _dst.getUMat();
2380 }
2381 else
2382 {
2383 _dst.create(sz: src.size(), CV_MAKETYPE(depth, 1));
2384 output.create(size: src.size(), CV_MAKETYPE(depth, 2));
2385 }
2386 }
2387
2388 bool result = false;
2389 if (!inv)
2390 {
2391 int nonzero_cols = fftType == R2R ? output.cols/2 + 1 : output.cols;
2392 result = ocl_dft_rows(src: src, dst: output, nonzero_rows, flags, fftType);
2393 if (!is1d)
2394 result = result && ocl_dft_cols(src: output, _dst, nonzero_cols, flags, fftType);
2395 }
2396 else
2397 {
2398 if (fftType == C2C)
2399 {
2400 // complex output
2401 result = ocl_dft_rows(src: src, dst: output, nonzero_rows, flags, fftType);
2402 if (!is1d)
2403 result = result && ocl_dft_cols(src: output, dst: output, nonzero_cols: output.cols, flags, fftType);
2404 }
2405 else
2406 {
2407 if (is1d)
2408 {
2409 result = ocl_dft_rows(src: src, dst: output, nonzero_rows, flags, fftType);
2410 }
2411 else
2412 {
2413 int nonzero_cols = src.cols/2 + 1;
2414 result = ocl_dft_cols(src: src, dst: output, nonzero_cols, flags, fftType);
2415 result = result && ocl_dft_rows(src: output, _dst, nonzero_rows, flags, fftType);
2416 }
2417 }
2418 }
2419 return result;
2420}
2421
2422} // namespace cv;
2423
2424#endif
2425
2426#ifdef HAVE_CLAMDFFT
2427
2428namespace cv {
2429
2430#define CLAMDDFT_Assert(func) \
2431 { \
2432 clfftStatus s = (func); \
2433 CV_Assert(s == CLFFT_SUCCESS); \
2434 }
2435
2436class PlanCache
2437{
2438 struct FftPlan
2439 {
2440 FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) :
2441 dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step),
2442 doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType),
2443 context((cl_context)ocl::Context::getDefault().ptr()), plHandle(0)
2444 {
2445 bool dft_inverse = (flags & DFT_INVERSE) != 0;
2446 bool dft_scale = (flags & DFT_SCALE) != 0;
2447 bool dft_rows = (flags & DFT_ROWS) != 0;
2448
2449 clfftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
2450 clfftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D;
2451
2452 size_t batchSize = dft_rows ? dft_size.height : 1;
2453 size_t clLengthsIn[3] = { (size_t)dft_size.width, dft_rows ? 1 : (size_t)dft_size.height, 1 };
2454 size_t clStridesIn[3] = { 1, 1, 1 };
2455 size_t clStridesOut[3] = { 1, 1, 1 };
2456 int elemSize = doubleFP ? sizeof(double) : sizeof(float);
2457
2458 switch (fftType)
2459 {
2460 case C2C:
2461 inLayout = CLFFT_COMPLEX_INTERLEAVED;
2462 outLayout = CLFFT_COMPLEX_INTERLEAVED;
2463 clStridesIn[1] = src_step / (elemSize << 1);
2464 clStridesOut[1] = dst_step / (elemSize << 1);
2465 break;
2466 case R2C:
2467 inLayout = CLFFT_REAL;
2468 outLayout = CLFFT_HERMITIAN_INTERLEAVED;
2469 clStridesIn[1] = src_step / elemSize;
2470 clStridesOut[1] = dst_step / (elemSize << 1);
2471 break;
2472 case C2R:
2473 inLayout = CLFFT_HERMITIAN_INTERLEAVED;
2474 outLayout = CLFFT_REAL;
2475 clStridesIn[1] = src_step / (elemSize << 1);
2476 clStridesOut[1] = dst_step / elemSize;
2477 break;
2478 case R2R:
2479 default:
2480 CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type");
2481 break;
2482 }
2483
2484 clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1];
2485 clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1];
2486
2487 CLAMDDFT_Assert(clfftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn))
2488
2489 // setting plan properties
2490 CLAMDDFT_Assert(clfftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
2491 CLAMDDFT_Assert(clfftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
2492 CLAMDDFT_Assert(clfftSetLayout(plHandle, inLayout, outLayout))
2493 CLAMDDFT_Assert(clfftSetPlanBatchSize(plHandle, batchSize))
2494 CLAMDDFT_Assert(clfftSetPlanInStride(plHandle, dim, clStridesIn))
2495 CLAMDDFT_Assert(clfftSetPlanOutStride(plHandle, dim, clStridesOut))
2496 CLAMDDFT_Assert(clfftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))
2497
2498 float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f;
2499 CLAMDDFT_Assert(clfftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))
2500
2501 // ready to bake
2502 cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
2503 CLAMDDFT_Assert(clfftBakePlan(plHandle, 1, &queue, NULL, NULL))
2504 }
2505
2506 ~FftPlan()
2507 {
2508 // Do not tear down clFFT.
2509 // The user application may still use clFFT even after OpenCV is unloaded.
2510 /*clfftDestroyPlan(&plHandle);*/
2511 }
2512
2513 friend class PlanCache;
2514
2515 private:
2516 Size dft_size;
2517 int src_step, dst_step;
2518 bool doubleFP;
2519 bool inplace;
2520 int flags;
2521 FftType fftType;
2522
2523 cl_context context;
2524 clfftPlanHandle plHandle;
2525 };
2526
2527public:
2528 static PlanCache & getInstance()
2529 {
2530 CV_SINGLETON_LAZY_INIT_REF(PlanCache, new PlanCache())
2531 }
2532
2533 clfftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
2534 bool inplace, int flags, FftType fftType)
2535 {
2536 cl_context currentContext = (cl_context)ocl::Context::getDefault().ptr();
2537
2538 for (size_t i = 0, size = planStorage.size(); i < size; ++i)
2539 {
2540 const FftPlan * const plan = planStorage[i];
2541
2542 if (plan->dft_size == dft_size &&
2543 plan->flags == flags &&
2544 plan->src_step == src_step &&
2545 plan->dst_step == dst_step &&
2546 plan->doubleFP == doubleFP &&
2547 plan->fftType == fftType &&
2548 plan->inplace == inplace)
2549 {
2550 if (plan->context != currentContext)
2551 {
2552 planStorage.erase(planStorage.begin() + i);
2553 break;
2554 }
2555
2556 return plan->plHandle;
2557 }
2558 }
2559
2560 // no baked plan is found, so let's create a new one
2561 Ptr<FftPlan> newPlan = Ptr<FftPlan>(new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType));
2562 planStorage.push_back(newPlan);
2563
2564 return newPlan->plHandle;
2565 }
2566
2567 ~PlanCache()
2568 {
2569 planStorage.clear();
2570 }
2571
2572protected:
2573 PlanCache() :
2574 planStorage()
2575 {
2576 }
2577
2578 std::vector<Ptr<FftPlan> > planStorage;
2579};
2580
2581extern "C" {
2582
2583static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
2584{
2585 UMatData * u = (UMatData *)p;
2586
2587 if( u && CV_XADD(&u->urefcount, -1) == 1 )
2588 u->currAllocator->deallocate(u);
2589 u = 0;
2590
2591 clReleaseEvent(e), e = 0;
2592}
2593
2594}
2595
2596static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags)
2597{
2598 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2599 Size ssize = _src.size();
2600
2601 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
2602 if ( (!doubleSupport && depth == CV_64F) ||
2603 !(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) ||
2604 _src.offset() != 0)
2605 return false;
2606
2607 // if is not a multiplication of prime numbers { 2, 3, 5 }
2608 if (ssize.area() != getOptimalDFTSize(ssize.area()))
2609 return false;
2610
2611 int dst_complex_input = cn == 2 ? 1 : 0;
2612 bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0;
2613 int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
2614 bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0;
2615
2616 CV_Assert(dft_complex_output + dft_real_output < 2);
2617 FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1);
2618
2619 switch (fftType)
2620 {
2621 case C2C:
2622 _dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2));
2623 break;
2624 case R2C: // TODO implement it if possible
2625 case C2R: // TODO implement it if possible
2626 case R2R: // AMD Fft does not support this type
2627 default:
2628 return false;
2629 }
2630
2631 UMat src = _src.getUMat(), dst = _dst.getUMat();
2632 bool inplace = src.u == dst.u;
2633
2634 clfftPlanHandle plHandle = PlanCache::getInstance().
2635 getPlanHandle(ssize, (int)src.step, (int)dst.step,
2636 depth == CV_64F, inplace, flags, fftType);
2637
2638 // get the bufferSize
2639 size_t bufferSize = 0;
2640 CLAMDDFT_Assert(clfftGetTmpBufSize(plHandle, &bufferSize))
2641 UMat tmpBuffer(1, (int)bufferSize, CV_8UC1);
2642
2643 cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ);
2644 cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW);
2645
2646 cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
2647 cl_event e = 0;
2648
2649 CLAMDDFT_Assert(clfftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
2650 1, &queue, 0, NULL, &e,
2651 &srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))
2652
2653 tmpBuffer.addref();
2654 clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u);
2655 return true;
2656}
2657
2658#undef DFT_ASSERT
2659
2660}
2661
2662#endif // HAVE_CLAMDFFT
2663
2664namespace cv
2665{
2666
2667template <typename T>
2668static void complementComplex(T * ptr, size_t step, int n, int len, int dft_dims)
2669{
2670 T* p0 = (T*)ptr;
2671 size_t dstep = step/sizeof(p0[0]);
2672 for(int i = 0; i < len; i++ )
2673 {
2674 T* p = p0 + dstep*i;
2675 T* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
2676
2677 for( int j = 1; j < (n+1)/2; j++ )
2678 {
2679 p[(n-j)*2] = q[j*2];
2680 p[(n-j)*2+1] = -q[j*2+1];
2681 }
2682 }
2683}
2684
2685static void complementComplexOutput(int depth, uchar * ptr, size_t step, int count, int len, int dft_dims)
2686{
2687 if( depth == CV_32F )
2688 complementComplex(ptr: (float*)ptr, step, n: count, len, dft_dims);
2689 else
2690 complementComplex(ptr: (double*)ptr, step, n: count, len, dft_dims);
2691}
2692
2693enum DftMode {
2694 InvalidDft = 0,
2695 FwdRealToCCS,
2696 FwdRealToComplex,
2697 FwdComplexToComplex,
2698 InvCCSToReal,
2699 InvComplexToReal,
2700 InvComplexToComplex,
2701};
2702
2703enum DftDims {
2704 InvalidDim = 0,
2705 OneDim,
2706 OneDimColWise,
2707 TwoDims
2708};
2709
2710inline const char * modeName(DftMode m)
2711{
2712 switch (m)
2713 {
2714 case InvalidDft: return "InvalidDft";
2715 case FwdRealToCCS: return "FwdRealToCCS";
2716 case FwdRealToComplex: return "FwdRealToComplex";
2717 case FwdComplexToComplex: return "FwdComplexToComplex";
2718 case InvCCSToReal: return "InvCCSToReal";
2719 case InvComplexToReal: return "InvComplexToReal";
2720 case InvComplexToComplex: return "InvComplexToComplex";
2721 }
2722 return 0;
2723}
2724
2725inline const char * dimsName(DftDims d)
2726{
2727 switch (d)
2728 {
2729 case InvalidDim: return "InvalidDim";
2730 case OneDim: return "OneDim";
2731 case OneDimColWise: return "OneDimColWise";
2732 case TwoDims: return "TwoDims";
2733 };
2734 return 0;
2735}
2736
2737template <typename T>
2738inline bool isInv(T mode)
2739{
2740 switch ((DftMode)mode)
2741 {
2742 case InvCCSToReal:
2743 case InvComplexToReal:
2744 case InvComplexToComplex: return true;
2745 default: return false;
2746 }
2747}
2748
2749inline DftMode determineMode(bool inv, int cn1, int cn2)
2750{
2751 if (!inv)
2752 {
2753 if (cn1 == 1 && cn2 == 1)
2754 return FwdRealToCCS;
2755 else if (cn1 == 1 && cn2 == 2)
2756 return FwdRealToComplex;
2757 else if (cn1 == 2 && cn2 == 2)
2758 return FwdComplexToComplex;
2759 }
2760 else
2761 {
2762 if (cn1 == 1 && cn2 == 1)
2763 return InvCCSToReal;
2764 else if (cn1 == 2 && cn2 == 1)
2765 return InvComplexToReal;
2766 else if (cn1 == 2 && cn2 == 2)
2767 return InvComplexToComplex;
2768 }
2769 return InvalidDft;
2770}
2771
2772
2773inline DftDims determineDims(int rows, int cols, bool isRowWise, bool isContinuous)
2774{
2775 // printf("%d x %d (%d, %d)\n", rows, cols, isRowWise, isContinuous);
2776 if (isRowWise)
2777 return OneDim;
2778 if (cols == 1 && rows > 1) // one-column-shaped input
2779 {
2780 if (isContinuous)
2781 return OneDim;
2782 else
2783 return OneDimColWise;
2784 }
2785 if (rows == 1)
2786 return OneDim;
2787 if (cols > 1 && rows > 1)
2788 return TwoDims;
2789 return InvalidDim;
2790}
2791
2792class OcvDftImpl CV_FINAL : public hal::DFT2D
2793{
2794protected:
2795 Ptr<hal::DFT1D> contextA;
2796 Ptr<hal::DFT1D> contextB;
2797 bool needBufferA;
2798 bool needBufferB;
2799 bool inv;
2800 int width;
2801 int height;
2802 DftMode mode;
2803 int elem_size;
2804 int complex_elem_size;
2805 int depth;
2806 bool real_transform;
2807 int nonzero_rows;
2808 bool isRowTransform;
2809 bool isScaled;
2810 std::vector<int> stages;
2811 bool useIpp;
2812 int src_channels;
2813 int dst_channels;
2814
2815 AutoBuffer<uchar> tmp_bufA;
2816 AutoBuffer<uchar> tmp_bufB;
2817 AutoBuffer<uchar> buf0;
2818 AutoBuffer<uchar> buf1;
2819
2820public:
2821 OcvDftImpl()
2822 {
2823 needBufferA = false;
2824 needBufferB = false;
2825 inv = false;
2826 width = 0;
2827 height = 0;
2828 mode = InvalidDft;
2829 elem_size = 0;
2830 complex_elem_size = 0;
2831 depth = 0;
2832 real_transform = false;
2833 nonzero_rows = 0;
2834 isRowTransform = false;
2835 isScaled = false;
2836 useIpp = false;
2837 src_channels = 0;
2838 dst_channels = 0;
2839 }
2840
2841 void init(int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows)
2842 {
2843 bool isComplex = _src_channels != _dst_channels;
2844 nonzero_rows = _nonzero_rows;
2845 width = _width;
2846 height = _height;
2847 depth = _depth;
2848 src_channels = _src_channels;
2849 dst_channels = _dst_channels;
2850 bool isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
2851 bool isInplace = (flags & CV_HAL_DFT_IS_INPLACE) != 0;
2852 bool isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0;
2853 mode = determineMode(inv: isInverse, cn1: _src_channels, cn2: _dst_channels);
2854 inv = isInverse;
2855 isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0;
2856 isScaled = (flags & CV_HAL_DFT_SCALE) != 0;
2857 needBufferA = false;
2858 needBufferB = false;
2859 real_transform = (mode != FwdComplexToComplex && mode != InvComplexToComplex);
2860
2861 elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
2862 complex_elem_size = elem_size * 2;
2863 if( !real_transform )
2864 elem_size = complex_elem_size;
2865
2866#if defined USE_IPP_DFT
2867 CV_IPP_CHECK()
2868 {
2869 if (nonzero_rows == 0 && depth == CV_32F && ((width * height)>(int)(1<<6)))
2870 {
2871 if (mode == FwdComplexToComplex || mode == InvComplexToComplex || mode == FwdRealToCCS || mode == InvCCSToReal)
2872 {
2873 useIpp = true;
2874 return;
2875 }
2876 }
2877 }
2878#endif
2879
2880 DftDims dims = determineDims(rows: height, cols: width, isRowWise: isRowTransform, isContinuous);
2881 if (dims == TwoDims)
2882 {
2883 stages.resize(new_size: 2);
2884 if (mode == InvCCSToReal || mode == InvComplexToReal)
2885 {
2886 stages[0] = 1;
2887 stages[1] = 0;
2888 }
2889 else
2890 {
2891 stages[0] = 0;
2892 stages[1] = 1;
2893 }
2894 }
2895 else
2896 {
2897 stages.resize(new_size: 1);
2898 if (dims == OneDimColWise)
2899 stages[0] = 1;
2900 else
2901 stages[0] = 0;
2902 }
2903
2904 for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex)
2905 {
2906 if (stageIndex == 1)
2907 {
2908 isInplace = true;
2909 isComplex = false;
2910 }
2911
2912 int stage = stages[stageIndex];
2913 bool isLastStage = (stageIndex + 1 == stages.size());
2914
2915 int len, count;
2916
2917 int f = 0;
2918 if (inv)
2919 f |= CV_HAL_DFT_INVERSE;
2920 if (isScaled)
2921 f |= CV_HAL_DFT_SCALE;
2922 if (isRowTransform)
2923 f |= CV_HAL_DFT_ROWS;
2924 if (isComplex)
2925 f |= CV_HAL_DFT_COMPLEX_OUTPUT;
2926 if (real_transform)
2927 f |= CV_HAL_DFT_REAL_OUTPUT;
2928 if (!isLastStage)
2929 f |= CV_HAL_DFT_TWO_STAGE;
2930
2931 if( stage == 0 ) // row-wise transform
2932 {
2933 if (width == 1 && !isRowTransform )
2934 {
2935 len = height;
2936 count = width;
2937 }
2938 else
2939 {
2940 len = width;
2941 count = height;
2942 }
2943 needBufferA = isInplace;
2944 contextA = hal::DFT1D::create(len, count, depth, flags: f, useBuffer: &needBufferA);
2945 if (needBufferA)
2946 tmp_bufA.allocate(size: len * complex_elem_size);
2947 }
2948 else
2949 {
2950 len = height;
2951 count = width;
2952 f |= CV_HAL_DFT_STAGE_COLS;
2953 needBufferB = isInplace;
2954 contextB = hal::DFT1D::create(len, count, depth, flags: f, useBuffer: &needBufferB);
2955 if (needBufferB)
2956 tmp_bufB.allocate(size: len * complex_elem_size);
2957
2958 buf0.allocate(size: len * complex_elem_size);
2959 buf1.allocate(size: len * complex_elem_size);
2960 }
2961 }
2962 }
2963
2964 void apply(const uchar * src, size_t src_step, uchar * dst, size_t dst_step) CV_OVERRIDE
2965 {
2966#if defined USE_IPP_DFT
2967 if (useIpp)
2968 {
2969 int ipp_norm_flag = !isScaled ? 8 : inv ? 2 : 1;
2970 if (!isRowTransform)
2971 {
2972 if (mode == FwdComplexToComplex || mode == InvComplexToComplex)
2973 {
2974 if (ippi_DFT_C_32F(src, src_step, dst, dst_step, width, height, inv, norm_flag: ipp_norm_flag))
2975 {
2976 CV_IMPL_ADD(CV_IMPL_IPP);
2977 return;
2978 }
2979 setIppErrorStatus();
2980 }
2981 else if (mode == FwdRealToCCS || mode == InvCCSToReal)
2982 {
2983 if (ippi_DFT_R_32F(src, src_step, dst, dst_step, width, height, inv, norm_flag: ipp_norm_flag))
2984 {
2985 CV_IMPL_ADD(CV_IMPL_IPP);
2986 return;
2987 }
2988 setIppErrorStatus();
2989 }
2990 }
2991 else
2992 {
2993 if (mode == FwdComplexToComplex || mode == InvComplexToComplex)
2994 {
2995 ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R;
2996 if (Dft_C_IPPLoop(src, src_step, dst, dst_step, width, height, ippidft: IPPDFT_C_Functor(ippiFunc),norm_flag: ipp_norm_flag))
2997 {
2998 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2999 return;
3000 }
3001 setIppErrorStatus();
3002 }
3003 else if (mode == FwdRealToCCS || mode == InvCCSToReal)
3004 {
3005 ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R;
3006 if (Dft_R_IPPLoop(src, src_step, dst, dst_step, width, height, ippidft: IPPDFT_R_Functor(ippiFunc),norm_flag: ipp_norm_flag))
3007 {
3008 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
3009 return;
3010 }
3011 setIppErrorStatus();
3012 }
3013 }
3014 return;
3015 }
3016#endif
3017
3018 for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex)
3019 {
3020 int stage_src_channels = src_channels;
3021 int stage_dst_channels = dst_channels;
3022
3023 if (stageIndex == 1)
3024 {
3025 src = dst;
3026 src_step = dst_step;
3027 stage_src_channels = stage_dst_channels;
3028 }
3029
3030 int stage = stages[stageIndex];
3031 bool isLastStage = (stageIndex + 1 == stages.size());
3032 bool isComplex = stage_src_channels != stage_dst_channels;
3033
3034 if( stage == 0 )
3035 rowDft(src_data: src, src_step, dst_data: dst, dst_step, isComplex, isLastStage);
3036 else
3037 colDft(src_data: src, src_step, dst_data: dst, dst_step, stage_src_channels, stage_dst_channels, isLastStage);
3038 }
3039 }
3040
3041protected:
3042
3043 void rowDft(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, bool isComplex, bool isLastStage)
3044 {
3045 int len, count;
3046 if (width == 1 && !isRowTransform )
3047 {
3048 len = height;
3049 count = width;
3050 }
3051 else
3052 {
3053 len = width;
3054 count = height;
3055 }
3056 int dptr_offset = 0;
3057 int dst_full_len = len*elem_size;
3058
3059 if( needBufferA )
3060 {
3061 if (mode == FwdRealToCCS && (len & 1) && len > 1)
3062 dptr_offset = elem_size;
3063 }
3064
3065 if( !inv && isComplex )
3066 dst_full_len += (len & 1) ? elem_size : complex_elem_size;
3067
3068 int nz = nonzero_rows;
3069 if( nz <= 0 || nz > count )
3070 nz = count;
3071
3072 int i;
3073 for( i = 0; i < nz; i++ )
3074 {
3075 const uchar* sptr = src_data + src_step * i;
3076 uchar* dptr0 = dst_data + dst_step * i;
3077 uchar* dptr = dptr0;
3078
3079 if( needBufferA )
3080 dptr = tmp_bufA.data();
3081
3082 contextA->apply(src: sptr, dst: dptr);
3083
3084 if( needBufferA )
3085 memcpy( dest: dptr0, src: dptr + dptr_offset, n: dst_full_len );
3086 }
3087
3088 for( ; i < count; i++ )
3089 {
3090 uchar* dptr0 = dst_data + dst_step * i;
3091 memset( s: dptr0, c: 0, n: dst_full_len );
3092 }
3093 if(isLastStage && mode == FwdRealToComplex)
3094 complementComplexOutput(depth, ptr: dst_data, step: dst_step, count: len, len: nz, dft_dims: 1);
3095 }
3096
3097 void colDft(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage)
3098 {
3099 int len = height;
3100 int count = width;
3101 int a = 0, b = count;
3102 uchar *dbuf0, *dbuf1;
3103 const uchar* sptr0 = src_data;
3104 uchar* dptr0 = dst_data;
3105
3106 dbuf0 = buf0.data(), dbuf1 = buf1.data();
3107
3108 if( needBufferB )
3109 {
3110 dbuf1 = tmp_bufB.data();
3111 dbuf0 = buf1.data();
3112 }
3113
3114 if( real_transform )
3115 {
3116 int even;
3117 a = 1;
3118 even = (count & 1) == 0;
3119 b = (count+1)/2;
3120 if( !inv )
3121 {
3122 memset( s: buf0.data(), c: 0, n: len*complex_elem_size );
3123 CopyColumn( src: sptr0, src_step, dst: buf0.data(), dst_step: complex_elem_size, len, elem_size );
3124 sptr0 += stage_dst_channels*elem_size;
3125 if( even )
3126 {
3127 memset( s: buf1.data(), c: 0, n: len*complex_elem_size );
3128 CopyColumn( src: sptr0 + (count-2)*elem_size, src_step,
3129 dst: buf1.data(), dst_step: complex_elem_size, len, elem_size );
3130 }
3131 }
3132 else if( stage_src_channels == 1 )
3133 {
3134 CopyColumn( src: sptr0, src_step, dst: buf0.data(), dst_step: elem_size, len, elem_size );
3135 ExpandCCS( ptr: buf0.data(), n: len, elem_size );
3136 if( even )
3137 {
3138 CopyColumn( src: sptr0 + (count-1)*elem_size, src_step,
3139 dst: buf1.data(), dst_step: elem_size, len, elem_size );
3140 ExpandCCS( ptr: buf1.data(), n: len, elem_size );
3141 }
3142 sptr0 += elem_size;
3143 }
3144 else
3145 {
3146 CopyColumn( src: sptr0, src_step, dst: buf0.data(), dst_step: complex_elem_size, len, elem_size: complex_elem_size );
3147 if( even )
3148 {
3149 CopyColumn( src: sptr0 + b*complex_elem_size, src_step,
3150 dst: buf1.data(), dst_step: complex_elem_size, len, elem_size: complex_elem_size );
3151 }
3152 sptr0 += complex_elem_size;
3153 }
3154
3155 if( even )
3156 contextB->apply(src: buf1.data(), dst: dbuf1);
3157 contextB->apply(src: buf0.data(), dst: dbuf0);
3158
3159 if( stage_dst_channels == 1 )
3160 {
3161 if( !inv )
3162 {
3163 // copy the half of output vector to the first/last column.
3164 // before doing that, defgragment the vector
3165 memcpy( dest: dbuf0 + elem_size, src: dbuf0, n: elem_size );
3166 CopyColumn( src: dbuf0 + elem_size, src_step: elem_size, dst: dptr0,
3167 dst_step, len, elem_size );
3168 if( even )
3169 {
3170 memcpy( dest: dbuf1 + elem_size, src: dbuf1, n: elem_size );
3171 CopyColumn( src: dbuf1 + elem_size, src_step: elem_size,
3172 dst: dptr0 + (count-1)*elem_size,
3173 dst_step, len, elem_size );
3174 }
3175 dptr0 += elem_size;
3176 }
3177 else
3178 {
3179 // copy the real part of the complex vector to the first/last column
3180 CopyColumn( src: dbuf0, src_step: complex_elem_size, dst: dptr0, dst_step, len, elem_size );
3181 if( even )
3182 CopyColumn( src: dbuf1, src_step: complex_elem_size, dst: dptr0 + (count-1)*elem_size,
3183 dst_step, len, elem_size );
3184 dptr0 += elem_size;
3185 }
3186 }
3187 else
3188 {
3189 CV_Assert( !inv );
3190 CopyColumn( src: dbuf0, src_step: complex_elem_size, dst: dptr0,
3191 dst_step, len, elem_size: complex_elem_size );
3192 if( even )
3193 CopyColumn( src: dbuf1, src_step: complex_elem_size,
3194 dst: dptr0 + b*complex_elem_size,
3195 dst_step, len, elem_size: complex_elem_size );
3196 dptr0 += complex_elem_size;
3197 }
3198 }
3199
3200 for(int i = a; i < b; i += 2 )
3201 {
3202 if( i+1 < b )
3203 {
3204 CopyFrom2Columns( src: sptr0, src_step, dst0: buf0.data(), dst1: buf1.data(), len, elem_size: complex_elem_size );
3205 contextB->apply(src: buf1.data(), dst: dbuf1);
3206 }
3207 else
3208 CopyColumn( src: sptr0, src_step, dst: buf0.data(), dst_step: complex_elem_size, len, elem_size: complex_elem_size );
3209
3210 contextB->apply(src: buf0.data(), dst: dbuf0);
3211
3212 if( i+1 < b )
3213 CopyTo2Columns( src0: dbuf0, src1: dbuf1, dst: dptr0, dst_step, len, elem_size: complex_elem_size );
3214 else
3215 CopyColumn( src: dbuf0, src_step: complex_elem_size, dst: dptr0, dst_step, len, elem_size: complex_elem_size );
3216 sptr0 += 2*complex_elem_size;
3217 dptr0 += 2*complex_elem_size;
3218 }
3219 if(isLastStage && mode == FwdRealToComplex)
3220 complementComplexOutput(depth, ptr: dst_data, step: dst_step, count, len, dft_dims: 2);
3221 }
3222};
3223
3224class OcvDftBasicImpl CV_FINAL : public hal::DFT1D
3225{
3226public:
3227 OcvDftOptions opt;
3228 int _factors[34];
3229 AutoBuffer<uchar> wave_buf;
3230 AutoBuffer<int> itab_buf;
3231#ifdef USE_IPP_DFT
3232 AutoBuffer<uchar> ippbuf;
3233 AutoBuffer<uchar> ippworkbuf;
3234#endif
3235
3236public:
3237 OcvDftBasicImpl()
3238 {
3239 opt.factors = _factors;
3240 }
3241 void init(int len, int count, int depth, int flags, bool *needBuffer)
3242 {
3243 int prev_len = opt.n;
3244
3245 int stage = (flags & CV_HAL_DFT_STAGE_COLS) != 0 ? 1 : 0;
3246 int complex_elem_size = depth == CV_32F ? sizeof(Complex<float>) : sizeof(Complex<double>);
3247 opt.isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
3248 bool real_transform = (flags & CV_HAL_DFT_REAL_OUTPUT) != 0;
3249 opt.isComplex = (stage == 0) && (flags & CV_HAL_DFT_COMPLEX_OUTPUT) != 0;
3250 bool needAnotherStage = (flags & CV_HAL_DFT_TWO_STAGE) != 0;
3251
3252 opt.scale = 1;
3253 opt.tab_size = len;
3254 opt.n = len;
3255
3256 opt.useIpp = false;
3257 #ifdef USE_IPP_DFT
3258 opt.ipp_spec = 0;
3259 opt.ipp_work = 0;
3260
3261 if( CV_IPP_CHECK_COND && (opt.n*count >= 64) ) // use IPP DFT if available
3262 {
3263 int ipp_norm_flag = (flags & CV_HAL_DFT_SCALE) == 0 ? 8 : opt.isInverse ? 2 : 1;
3264 int specsize=0, initsize=0, worksize=0;
3265 IppDFTGetSizeFunc getSizeFunc = 0;
3266 IppDFTInitFunc initFunc = 0;
3267
3268 if( real_transform && stage == 0 )
3269 {
3270 if( depth == CV_32F )
3271 {
3272 getSizeFunc = ippsDFTGetSize_R_32f;
3273 initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
3274 }
3275 else
3276 {
3277 getSizeFunc = ippsDFTGetSize_R_64f;
3278 initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
3279 }
3280 }
3281 else
3282 {
3283 if( depth == CV_32F )
3284 {
3285 getSizeFunc = ippsDFTGetSize_C_32fc;
3286 initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
3287 }
3288 else
3289 {
3290 getSizeFunc = ippsDFTGetSize_C_64fc;
3291 initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
3292 }
3293 }
3294 if( getSizeFunc(opt.n, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
3295 {
3296 ippbuf.allocate(size: specsize + initsize + 64);
3297 opt.ipp_spec = alignPtr(ptr: &ippbuf[0], n: 32);
3298 ippworkbuf.allocate(size: worksize + 32);
3299 opt.ipp_work = alignPtr(ptr: &ippworkbuf[0], n: 32);
3300 uchar* initbuf = alignPtr(ptr: (uchar*)opt.ipp_spec + specsize, n: 32);
3301 if( initFunc(opt.n, ipp_norm_flag, ippAlgHintNone, opt.ipp_spec, initbuf) >= 0 )
3302 opt.useIpp = true;
3303 }
3304 else
3305 setIppErrorStatus();
3306 }
3307 #endif
3308
3309 if (!opt.useIpp)
3310 {
3311 if (len != prev_len)
3312 {
3313 opt.nf = DFTFactorize( n: opt.n, factors: opt.factors );
3314 }
3315 bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];
3316 if (len != prev_len || (!inplace_transform && opt.isInverse && real_transform))
3317 {
3318 wave_buf.allocate(size: opt.n*complex_elem_size);
3319 opt.wave = wave_buf.data();
3320 itab_buf.allocate(size: opt.n);
3321 opt.itab = itab_buf.data();
3322 DFTInit( n0: opt.n, nf: opt.nf, factors: opt.factors, itab: opt.itab, elem_size: complex_elem_size,
3323 wave: opt.wave, inv_itab: stage == 0 && opt.isInverse && real_transform );
3324 }
3325 // otherwise reuse the tables calculated on the previous stage
3326 if (needBuffer)
3327 {
3328 if( (stage == 0 && ((*needBuffer && !inplace_transform) || (real_transform && (len & 1)))) ||
3329 (stage == 1 && !inplace_transform) )
3330 {
3331 *needBuffer = true;
3332 }
3333 }
3334 }
3335 else
3336 {
3337 if (needBuffer)
3338 {
3339 *needBuffer = false;
3340 }
3341 }
3342
3343 {
3344 static DFTFunc dft_tbl[6] =
3345 {
3346 (DFTFunc)DFT_32f,
3347 (DFTFunc)RealDFT_32f,
3348 (DFTFunc)CCSIDFT_32f,
3349 (DFTFunc)DFT_64f,
3350 (DFTFunc)RealDFT_64f,
3351 (DFTFunc)CCSIDFT_64f
3352 };
3353 int idx = 0;
3354 if (stage == 0)
3355 {
3356 if (real_transform)
3357 {
3358 if (!opt.isInverse)
3359 idx = 1;
3360 else
3361 idx = 2;
3362 }
3363 }
3364 if (depth == CV_64F)
3365 idx += 3;
3366
3367 opt.dft_func = dft_tbl[idx];
3368 }
3369
3370 if(!needAnotherStage && (flags & CV_HAL_DFT_SCALE) != 0)
3371 {
3372 int rowCount = count;
3373 if (stage == 0 && (flags & CV_HAL_DFT_ROWS) != 0)
3374 rowCount = 1;
3375 opt.scale = 1./(len * rowCount);
3376 }
3377 }
3378
3379 void apply(const uchar *src, uchar *dst) CV_OVERRIDE
3380 {
3381 opt.dft_func(opt, src, dst);
3382 }
3383
3384 void free() {}
3385};
3386
3387struct ReplacementDFT1D : public hal::DFT1D
3388{
3389 cvhalDFT *context;
3390 bool isInitialized;
3391
3392 ReplacementDFT1D() : context(0), isInitialized(false) {}
3393 bool init(int len, int count, int depth, int flags, bool *needBuffer)
3394 {
3395 int res = cv_hal_dftInit1D(context: &context, len, count, depth, flags, needBuffer);
3396 isInitialized = (res == CV_HAL_ERROR_OK);
3397 return isInitialized;
3398 }
3399 void apply(const uchar *src, uchar *dst) CV_OVERRIDE
3400 {
3401 if (isInitialized)
3402 {
3403 CALL_HAL(dft1D, cv_hal_dft1D, context, src, dst);
3404 }
3405 }
3406 ~ReplacementDFT1D()
3407 {
3408 if (isInitialized)
3409 {
3410 CALL_HAL(dftFree1D, cv_hal_dftFree1D, context);
3411 }
3412 }
3413};
3414
3415struct ReplacementDFT2D : public hal::DFT2D
3416{
3417 cvhalDFT *context;
3418 bool isInitialized;
3419
3420 ReplacementDFT2D() : context(0), isInitialized(false) {}
3421 bool init(int width, int height, int depth,
3422 int src_channels, int dst_channels,
3423 int flags, int nonzero_rows)
3424 {
3425 int res = cv_hal_dftInit2D(context: &context, width, height, depth, src_channels, dst_channels, flags, nonzero_rows);
3426 isInitialized = (res == CV_HAL_ERROR_OK);
3427 return isInitialized;
3428 }
3429 void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step) CV_OVERRIDE
3430 {
3431 if (isInitialized)
3432 {
3433 CALL_HAL(dft2D, cv_hal_dft2D, context, src, src_step, dst, dst_step);
3434 }
3435 }
3436 ~ReplacementDFT2D()
3437 {
3438 if (isInitialized)
3439 {
3440 CALL_HAL(dftFree2D, cv_hal_dftFree1D, context);
3441 }
3442 }
3443};
3444
3445namespace hal {
3446
3447//================== 1D ======================
3448
3449Ptr<DFT1D> DFT1D::create(int len, int count, int depth, int flags, bool *needBuffer)
3450{
3451 {
3452 ReplacementDFT1D *impl = new ReplacementDFT1D();
3453 if (impl->init(len, count, depth, flags, needBuffer))
3454 {
3455 return Ptr<DFT1D>(impl);
3456 }
3457 delete impl;
3458 }
3459 {
3460 OcvDftBasicImpl *impl = new OcvDftBasicImpl();
3461 impl->init(len, count, depth, flags, needBuffer);
3462 return Ptr<DFT1D>(impl);
3463 }
3464}
3465
3466//================== 2D ======================
3467
3468Ptr<DFT2D> DFT2D::create(int width, int height, int depth,
3469 int src_channels, int dst_channels,
3470 int flags, int nonzero_rows)
3471{
3472 {
3473 ReplacementDFT2D *impl = new ReplacementDFT2D();
3474 if (impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows))
3475 {
3476 return Ptr<DFT2D>(impl);
3477 }
3478 delete impl;
3479 }
3480 {
3481 if(width == 1 && nonzero_rows > 0 )
3482 {
3483 CV_Error( cv::Error::StsNotImplemented,
3484 "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
3485 "For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
3486 }
3487 OcvDftImpl *impl = new OcvDftImpl();
3488 impl->init(width: width, height: height, depth: depth, src_channels: src_channels, dst_channels: dst_channels, flags, nonzero_rows: nonzero_rows);
3489 return Ptr<DFT2D>(impl);
3490 }
3491}
3492
3493} // cv::hal::
3494} // cv::
3495
3496
3497void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
3498{
3499 CV_INSTRUMENT_REGION();
3500
3501#ifdef HAVE_CLAMDFFT
3502 CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
3503 _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0,
3504 ocl_dft_amdfft(_src0, _dst, flags))
3505#endif
3506
3507#ifdef HAVE_OPENCL
3508 CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2,
3509 ocl_dft(src: _src0, _dst, flags, nonzero_rows))
3510#endif
3511
3512 Mat src0 = _src0.getMat(), src = src0;
3513 bool inv = (flags & DFT_INVERSE) != 0;
3514 int type = src.type();
3515 int depth = src.depth();
3516
3517 CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
3518
3519 // Fail if DFT_COMPLEX_INPUT is specified, but src is not 2 channels.
3520 CV_Assert( !((flags & DFT_COMPLEX_INPUT) && src.channels() != 2) );
3521
3522 if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
3523 _dst.create( sz: src.size(), CV_MAKETYPE(depth, 2) );
3524 else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
3525 _dst.create( sz: src.size(), type: depth );
3526 else
3527 _dst.create( sz: src.size(), type );
3528
3529 Mat dst = _dst.getMat();
3530
3531 int f = 0;
3532 if (src.isContinuous() && dst.isContinuous())
3533 f |= CV_HAL_DFT_IS_CONTINUOUS;
3534 if (inv)
3535 f |= CV_HAL_DFT_INVERSE;
3536 if (flags & DFT_ROWS)
3537 f |= CV_HAL_DFT_ROWS;
3538 if (flags & DFT_SCALE)
3539 f |= CV_HAL_DFT_SCALE;
3540 if (src.data == dst.data)
3541 f |= CV_HAL_DFT_IS_INPLACE;
3542 Ptr<hal::DFT2D> c = hal::DFT2D::create(width: src.cols, height: src.rows, depth, src_channels: src.channels(), dst_channels: dst.channels(), flags: f, nonzero_rows);
3543 c->apply(src_data: src.data, src_step: src.step, dst_data: dst.data, dst_step: dst.step);
3544}
3545
3546
3547void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows )
3548{
3549 CV_INSTRUMENT_REGION();
3550
3551 dft( src0: src, dst: dst, flags: flags | DFT_INVERSE, nonzero_rows );
3552}
3553
3554#ifdef HAVE_OPENCL
3555
3556namespace cv {
3557
3558static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB,
3559 OutputArray _dst, int flags, bool conjB )
3560{
3561 int atype = _srcA.type(), btype = _srcB.type(),
3562 rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
3563 Size asize = _srcA.size(), bsize = _srcB.size();
3564 CV_Assert(asize == bsize);
3565
3566 if ( !(atype == CV_32FC2 && btype == CV_32FC2) || flags != 0 )
3567 return false;
3568
3569 UMat A = _srcA.getUMat(), B = _srcB.getUMat();
3570 CV_Assert(A.size() == B.size());
3571
3572 _dst.create(sz: A.size(), type: atype);
3573 UMat dst = _dst.getUMat();
3574
3575 ocl::Kernel k("mulAndScaleSpectrums",
3576 ocl::core::mulspectrums_oclsrc,
3577 format("%s", conjB ? "-D CONJ" : ""));
3578 if (k.empty())
3579 return false;
3580
3581 k.args(kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: A), kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: B),
3582 kernel_args: ocl::KernelArg::WriteOnly(m: dst), kernel_args: rowsPerWI);
3583
3584 size_t globalsize[2] = { (size_t)asize.width, ((size_t)asize.height + rowsPerWI - 1) / rowsPerWI };
3585 return k.run(dims: 2, globalsize, NULL, sync: false);
3586}
3587
3588}
3589
3590#endif
3591
3592namespace {
3593
3594#define VAL(buf, elem) (((T*)((char*)data ## buf + (step ## buf * (elem))))[0])
3595#define MUL_SPECTRUMS_COL(A, B, C) \
3596 VAL(C, 0) = VAL(A, 0) * VAL(B, 0); \
3597 for (size_t j = 1; j <= rows - 2; j += 2) \
3598 { \
3599 double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
3600 double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
3601 if (conjB) b_im = -b_im; \
3602 double c_re = a_re * b_re - a_im * b_im; \
3603 double c_im = a_re * b_im + a_im * b_re; \
3604 VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
3605 } \
3606 if ((rows & 1) == 0) \
3607 VAL(C, rows-1) = VAL(A, rows-1) * VAL(B, rows-1)
3608
3609template <typename T, bool conjB> static inline
3610void mulSpectrums_processCol_noinplace(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
3611{
3612 MUL_SPECTRUMS_COL(A, B, C);
3613}
3614
3615template <typename T, bool conjB> static inline
3616void mulSpectrums_processCol_inplaceA(const T* dataB, T* dataAC, size_t stepB, size_t stepAC, size_t rows)
3617{
3618 MUL_SPECTRUMS_COL(AC, B, AC);
3619}
3620template <typename T, bool conjB, bool inplaceA> static inline
3621void mulSpectrums_processCol(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
3622{
3623 if (inplaceA)
3624 mulSpectrums_processCol_inplaceA<T, conjB>(dataB, dataC, stepB, stepC, rows);
3625 else
3626 mulSpectrums_processCol_noinplace<T, conjB>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
3627}
3628#undef MUL_SPECTRUMS_COL
3629#undef VAL
3630
3631template <typename T, bool conjB, bool inplaceA> static inline
3632void mulSpectrums_processCols(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols)
3633{
3634 mulSpectrums_processCol<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
3635 if ((cols & 1) == 0)
3636 {
3637 mulSpectrums_processCol<T, conjB, inplaceA>(dataA + cols - 1, dataB + cols - 1, dataC + cols - 1, stepA, stepB, stepC, rows);
3638 }
3639}
3640
3641#define VAL(buf, elem) (data ## buf[(elem)])
3642#define MUL_SPECTRUMS_ROW(A, B, C) \
3643 for (size_t j = j0; j < j1; j += 2) \
3644 { \
3645 double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
3646 double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
3647 if (conjB) b_im = -b_im; \
3648 double c_re = a_re * b_re - a_im * b_im; \
3649 double c_im = a_re * b_im + a_im * b_re; \
3650 VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
3651 }
3652template <typename T, bool conjB> static inline
3653void mulSpectrums_processRow_noinplace(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
3654{
3655 MUL_SPECTRUMS_ROW(A, B, C);
3656}
3657template <typename T, bool conjB> static inline
3658void mulSpectrums_processRow_inplaceA(const T* dataB, T* dataAC, size_t j0, size_t j1)
3659{
3660 MUL_SPECTRUMS_ROW(AC, B, AC);
3661}
3662template <typename T, bool conjB, bool inplaceA> static inline
3663void mulSpectrums_processRow(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
3664{
3665 if (inplaceA)
3666 mulSpectrums_processRow_inplaceA<T, conjB>(dataB, dataC, j0, j1);
3667 else
3668 mulSpectrums_processRow_noinplace<T, conjB>(dataA, dataB, dataC, j0, j1);
3669}
3670#undef MUL_SPECTRUMS_ROW
3671#undef VAL
3672
3673template <typename T, bool conjB, bool inplaceA> static inline
3674void mulSpectrums_processRows(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d_CN1)
3675{
3676 while (rows-- > 0)
3677 {
3678 if (is_1d_CN1)
3679 dataC[0] = dataA[0]*dataB[0];
3680 mulSpectrums_processRow<T, conjB, inplaceA>(dataA, dataB, dataC, j0, j1);
3681 if (is_1d_CN1 && (cols & 1) == 0)
3682 dataC[j1] = dataA[j1]*dataB[j1];
3683
3684 dataA = (const T*)(((char*)dataA) + stepA);
3685 dataB = (const T*)(((char*)dataB) + stepB);
3686 dataC = (T*)(((char*)dataC) + stepC);
3687 }
3688}
3689
3690
3691template <typename T, bool conjB, bool inplaceA> static inline
3692void mulSpectrums_Impl_(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
3693{
3694 if (!is_1d && isCN1)
3695 {
3696 mulSpectrums_processCols<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols);
3697 }
3698 mulSpectrums_processRows<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d && isCN1);
3699}
3700template <typename T, bool conjB> static inline
3701void mulSpectrums_Impl(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
3702{
3703 if (dataA == dataC)
3704 mulSpectrums_Impl_<T, conjB, true>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
3705 else
3706 mulSpectrums_Impl_<T, conjB, false>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
3707}
3708
3709} // namespace
3710
3711void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
3712 OutputArray _dst, int flags, bool conjB )
3713{
3714 CV_INSTRUMENT_REGION();
3715
3716 CV_OCL_RUN(_dst.isUMat() && _srcA.dims() <= 2 && _srcB.dims() <= 2,
3717 ocl_mulSpectrums(_srcA, _srcB, _dst, flags, conjB))
3718
3719 Mat srcA = _srcA.getMat(), srcB = _srcB.getMat();
3720 int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type();
3721 size_t rows = srcA.rows, cols = srcA.cols;
3722
3723 CV_Assert( type == srcB.type() && srcA.size() == srcB.size() );
3724 CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
3725
3726 _dst.create( rows: srcA.rows, cols: srcA.cols, type );
3727 Mat dst = _dst.getMat();
3728
3729 // correct inplace support
3730 // Case 'dst.data == srcA.data' is handled by implementation,
3731 // because it is used frequently (filter2D, matchTemplate)
3732 if (dst.data == srcB.data)
3733 srcB = srcB.clone(); // workaround for B only
3734
3735 bool is_1d = (flags & DFT_ROWS)
3736 || (rows == 1)
3737 || (cols == 1 && srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous());
3738
3739 if( is_1d && !(flags & DFT_ROWS) )
3740 cols = cols + rows - 1, rows = 1;
3741
3742 bool isCN1 = cn == 1;
3743 size_t j0 = isCN1 ? 1 : 0;
3744 size_t j1 = cols*cn - (((cols & 1) == 0 && cn == 1) ? 1 : 0);
3745
3746 if (depth == CV_32F)
3747 {
3748 const float* dataA = srcA.ptr<float>();
3749 const float* dataB = srcB.ptr<float>();
3750 float* dataC = dst.ptr<float>();
3751 if (!conjB)
3752 mulSpectrums_Impl<float, false>(dataA, dataB, dataC, stepA: srcA.step, stepB: srcB.step, stepC: dst.step, rows, cols, j0, j1, is_1d, isCN1);
3753 else
3754 mulSpectrums_Impl<float, true>(dataA, dataB, dataC, stepA: srcA.step, stepB: srcB.step, stepC: dst.step, rows, cols, j0, j1, is_1d, isCN1);
3755 }
3756 else
3757 {
3758 const double* dataA = srcA.ptr<double>();
3759 const double* dataB = srcB.ptr<double>();
3760 double* dataC = dst.ptr<double>();
3761 if (!conjB)
3762 mulSpectrums_Impl<double, false>(dataA, dataB, dataC, stepA: srcA.step, stepB: srcB.step, stepC: dst.step, rows, cols, j0, j1, is_1d, isCN1);
3763 else
3764 mulSpectrums_Impl<double, true>(dataA, dataB, dataC, stepA: srcA.step, stepB: srcB.step, stepC: dst.step, rows, cols, j0, j1, is_1d, isCN1);
3765 }
3766}
3767
3768
3769/****************************************************************************************\
3770 Discrete Cosine Transform
3771\****************************************************************************************/
3772
3773namespace cv
3774{
3775
3776/* DCT is calculated using DFT, as described here:
3777 http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/:
3778*/
3779template<typename T> static void
3780DCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step,
3781 const Complex<T>* dct_wave )
3782{
3783 static const T sin_45 = (T)0.70710678118654752440084436210485;
3784
3785 int n = c.n;
3786 int j, n2 = n >> 1;
3787
3788 src_step /= sizeof(src[0]);
3789 dst_step /= sizeof(dst[0]);
3790 T* dst1 = dst + (n-1)*dst_step;
3791
3792 if( n == 1 )
3793 {
3794 dst[0] = src[0];
3795 return;
3796 }
3797
3798 for( j = 0; j < n2; j++, src += src_step*2 )
3799 {
3800 dft_src[j] = src[0];
3801 dft_src[n-j-1] = src[src_step];
3802 }
3803
3804 RealDFT(c, dft_src, dft_dst);
3805 src = dft_dst;
3806
3807 dst[0] = (T)(src[0]*dct_wave->re*sin_45);
3808 dst += dst_step;
3809 for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
3810 dst += dst_step, dst1 -= dst_step )
3811 {
3812 T t0 = dct_wave->re*src[j*2-1] - dct_wave->im*src[j*2];
3813 T t1 = -dct_wave->im*src[j*2-1] - dct_wave->re*src[j*2];
3814 dst[0] = t0;
3815 dst1[0] = t1;
3816 }
3817
3818 dst[0] = src[n-1]*dct_wave->re;
3819}
3820
3821
3822template<typename T> static void
3823IDCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step,
3824 const Complex<T>* dct_wave)
3825{
3826 static const T sin_45 = (T)0.70710678118654752440084436210485;
3827 int n = c.n;
3828 int j, n2 = n >> 1;
3829
3830 src_step /= sizeof(src[0]);
3831 dst_step /= sizeof(dst[0]);
3832 const T* src1 = src + (n-1)*src_step;
3833
3834 if( n == 1 )
3835 {
3836 dst[0] = src[0];
3837 return;
3838 }
3839
3840 dft_src[0] = (T)(src[0]*2*dct_wave->re*sin_45);
3841 src += src_step;
3842 for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
3843 src += src_step, src1 -= src_step )
3844 {
3845 T t0 = dct_wave->re*src[0] - dct_wave->im*src1[0];
3846 T t1 = -dct_wave->im*src[0] - dct_wave->re*src1[0];
3847 dft_src[j*2-1] = t0;
3848 dft_src[j*2] = t1;
3849 }
3850
3851 dft_src[n-1] = (T)(src[0]*2*dct_wave->re);
3852 CCSIDFT(c, dft_src, dft_dst);
3853
3854 for( j = 0; j < n2; j++, dst += dst_step*2 )
3855 {
3856 dst[0] = dft_dst[j];
3857 dst[dst_step] = dft_dst[n-j-1];
3858 }
3859}
3860
3861
3862static void
3863DCTInit( int n, int elem_size, void* _wave, int inv )
3864{
3865 static const double DctScale[] =
3866 {
3867 0.707106781186547570, 0.500000000000000000, 0.353553390593273790,
3868 0.250000000000000000, 0.176776695296636890, 0.125000000000000000,
3869 0.088388347648318447, 0.062500000000000000, 0.044194173824159223,
3870 0.031250000000000000, 0.022097086912079612, 0.015625000000000000,
3871 0.011048543456039806, 0.007812500000000000, 0.005524271728019903,
3872 0.003906250000000000, 0.002762135864009952, 0.001953125000000000,
3873 0.001381067932004976, 0.000976562500000000, 0.000690533966002488,
3874 0.000488281250000000, 0.000345266983001244, 0.000244140625000000,
3875 0.000172633491500622, 0.000122070312500000, 0.000086316745750311,
3876 0.000061035156250000, 0.000043158372875155, 0.000030517578125000
3877 };
3878
3879 int i;
3880 Complex<double> w, w1;
3881 double t, scale;
3882
3883 if( n == 1 )
3884 return;
3885
3886 CV_Assert( (n&1) == 0 );
3887
3888 if( (n & (n - 1)) == 0 )
3889 {
3890 int m;
3891 for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
3892 ;
3893 scale = (!inv ? 2 : 1)*DctScale[m];
3894 w1.re = DFTTab[m+2][0];
3895 w1.im = -DFTTab[m+2][1];
3896 }
3897 else
3898 {
3899 t = 1./(2*n);
3900 scale = (!inv ? 2 : 1)*std::sqrt(x: t);
3901 w1.im = sin(x: -CV_PI*t);
3902 w1.re = std::sqrt(x: 1. - w1.im*w1.im);
3903 }
3904 n >>= 1;
3905
3906 if( elem_size == sizeof(Complex<double>) )
3907 {
3908 Complex<double>* wave = (Complex<double>*)_wave;
3909
3910 w.re = scale;
3911 w.im = 0.;
3912
3913 for( i = 0; i <= n; i++ )
3914 {
3915 wave[i] = w;
3916 t = w.re*w1.re - w.im*w1.im;
3917 w.im = w.re*w1.im + w.im*w1.re;
3918 w.re = t;
3919 }
3920 }
3921 else
3922 {
3923 Complex<float>* wave = (Complex<float>*)_wave;
3924 CV_Assert( elem_size == sizeof(Complex<float>) );
3925
3926 w.re = (float)scale;
3927 w.im = 0.f;
3928
3929 for( i = 0; i <= n; i++ )
3930 {
3931 wave[i].re = (float)w.re;
3932 wave[i].im = (float)w.im;
3933 t = w.re*w1.re - w.im*w1.im;
3934 w.im = w.re*w1.im + w.im*w1.re;
3935 w.re = t;
3936 }
3937 }
3938}
3939
3940
3941typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, size_t src_step, void* dft_src,
3942 void* dft_dst, void* dst, size_t dst_step, const void* dct_wave);
3943
3944static void DCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst,
3945 float* dst, size_t dst_step, const Complexf* dct_wave)
3946{
3947 DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3948}
3949
3950static void IDCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst,
3951 float* dst, size_t dst_step, const Complexf* dct_wave)
3952{
3953 IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3954}
3955
3956static void DCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst,
3957 double* dst, size_t dst_step, const Complexd* dct_wave)
3958{
3959 DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3960}
3961
3962static void IDCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst,
3963 double* dst, size_t dst_step, const Complexd* dct_wave)
3964{
3965 IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3966}
3967
3968}
3969
3970#ifdef HAVE_IPP
3971namespace cv
3972{
3973
3974#if IPP_VERSION_X100 >= 900
3975typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer);
3976typedef IppStatus (CV_STDCALL * ippiDCTInit)(void* pDCTSpec, IppiSize roiSize, Ipp8u* pMemInit );
3977typedef IppStatus (CV_STDCALL * ippiDCTGetSize)(IppiSize roiSize, int* pSizeSpec, int* pSizeInit, int* pSizeBuf);
3978#elif IPP_VERSION_X100 >= 700
3979typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f*, int, Ipp32f*, int, const void*, Ipp8u*);
3980typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm);
3981typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec);
3982typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*);
3983#endif
3984
3985class DctIPPLoop_Invoker : public ParallelLoopBody
3986{
3987public:
3988 DctIPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width, bool _inv, bool *_ok) :
3989 ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), inv(_inv), ok(_ok)
3990 {
3991 *ok = true;
3992 }
3993
3994 virtual void operator()(const Range& range) const CV_OVERRIDE
3995 {
3996 if(*ok == false)
3997 return;
3998
3999#if IPP_VERSION_X100 >= 900
4000 IppiSize srcRoiSize = {.width: width, .height: 1};
4001
4002 int specSize = 0;
4003 int initSize = 0;
4004 int bufferSize = 0;
4005
4006 Ipp8u* pDCTSpec = NULL;
4007 Ipp8u* pBuffer = NULL;
4008 Ipp8u* pInitBuf = NULL;
4009
4010 #define IPP_RETURN \
4011 if(pDCTSpec) \
4012 ippFree(pDCTSpec); \
4013 if(pBuffer) \
4014 ippFree(pBuffer); \
4015 if(pInitBuf) \
4016 ippFree(pInitBuf); \
4017 return;
4018
4019 ippiDCTFunc ippiDCT_32f_C1R = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4020 ippiDCTInit ippDctInit = inv ? (ippiDCTInit)ippiDCTInvInit_32f : (ippiDCTInit)ippiDCTFwdInit_32f;
4021 ippiDCTGetSize ippDctGetSize = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
4022
4023 if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < 0)
4024 {
4025 *ok = false;
4026 return;
4027 }
4028
4029 pDCTSpec = (Ipp8u*)CV_IPP_MALLOC(specSize);
4030 if(!pDCTSpec && specSize)
4031 {
4032 *ok = false;
4033 return;
4034 }
4035
4036 pBuffer = (Ipp8u*)CV_IPP_MALLOC(bufferSize);
4037 if(!pBuffer && bufferSize)
4038 {
4039 *ok = false;
4040 IPP_RETURN
4041 }
4042 pInitBuf = (Ipp8u*)CV_IPP_MALLOC(initSize);
4043 if(!pInitBuf && initSize)
4044 {
4045 *ok = false;
4046 IPP_RETURN
4047 }
4048
4049 if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < 0)
4050 {
4051 *ok = false;
4052 IPP_RETURN
4053 }
4054
4055 for(int i = range.start; i < range.end; ++i)
4056 {
4057 if(CV_INSTRUMENT_FUN_IPP(ippiDCT_32f_C1R, (float*)(src + src_step * i), static_cast<int>(src_step), (float*)(dst + dst_step * i), static_cast<int>(dst_step), pDCTSpec, pBuffer) < 0)
4058 {
4059 *ok = false;
4060 IPP_RETURN
4061 }
4062 }
4063 IPP_RETURN
4064#undef IPP_RETURN
4065#elif IPP_VERSION_X100 >= 700
4066 void* pDCTSpec;
4067 AutoBuffer<uchar> buf;
4068 uchar* pBuffer = 0;
4069 int bufSize=0;
4070
4071 IppiSize srcRoiSize = {width, 1};
4072
4073 CV_SUPPRESS_DEPRECATED_START
4074
4075 ippiDCTFunc ippDctFun = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4076 ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
4077 ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f;
4078 ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
4079
4080 if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
4081 {
4082 buf.allocate( bufSize );
4083 pBuffer = (uchar*)buf;
4084
4085 for( int i = range.start; i < range.end; ++i)
4086 {
4087 if(ippDctFun((float*)(src + src_step * i), static_cast<int>(src_step), (float*)(dst + dst_step * i), static_cast<int>(dst_step), pDCTSpec, (Ipp8u*)pBuffer) < 0)
4088 {
4089 *ok = false;
4090 break;
4091 }
4092 }
4093 }
4094 else
4095 *ok = false;
4096
4097 if (pDCTSpec)
4098 ippFree(pDCTSpec);
4099
4100 CV_SUPPRESS_DEPRECATED_END
4101#else
4102 CV_UNUSED(range);
4103 *ok = false;
4104#endif
4105 }
4106
4107private:
4108 const uchar * src;
4109 size_t src_step;
4110 uchar * dst;
4111 size_t dst_step;
4112 int width;
4113 bool inv;
4114 bool *ok;
4115};
4116
4117static bool DctIPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv)
4118{
4119 bool ok;
4120 parallel_for_(range: Range(0, height), body: DctIPPLoop_Invoker(src, src_step, dst, dst_step, width, inv, &ok), nstripes: height/(double)(1<<4) );
4121 return ok;
4122}
4123
4124static bool ippi_DCT_32f(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, bool row)
4125{
4126 CV_INSTRUMENT_REGION_IPP();
4127
4128 if(row)
4129 return DctIPPLoop(src, src_step, dst, dst_step, width, height, inv);
4130 else
4131 {
4132#if IPP_VERSION_X100 >= 900
4133 IppiSize srcRoiSize = {.width: width, .height: height};
4134
4135 int specSize = 0;
4136 int initSize = 0;
4137 int bufferSize = 0;
4138
4139 Ipp8u* pDCTSpec = NULL;
4140 Ipp8u* pBuffer = NULL;
4141 Ipp8u* pInitBuf = NULL;
4142
4143 #define IPP_RELEASE \
4144 if(pDCTSpec) \
4145 ippFree(pDCTSpec); \
4146 if(pBuffer) \
4147 ippFree(pBuffer); \
4148 if(pInitBuf) \
4149 ippFree(pInitBuf); \
4150
4151 ippiDCTFunc ippiDCT_32f_C1R = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4152 ippiDCTInit ippDctInit = inv ? (ippiDCTInit)ippiDCTInvInit_32f : (ippiDCTInit)ippiDCTFwdInit_32f;
4153 ippiDCTGetSize ippDctGetSize = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
4154
4155 if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < 0)
4156 return false;
4157
4158 pDCTSpec = (Ipp8u*)CV_IPP_MALLOC(specSize);
4159 if(!pDCTSpec && specSize)
4160 return false;
4161
4162 pBuffer = (Ipp8u*)CV_IPP_MALLOC(bufferSize);
4163 if(!pBuffer && bufferSize)
4164 {
4165 IPP_RELEASE
4166 return false;
4167 }
4168 pInitBuf = (Ipp8u*)CV_IPP_MALLOC(initSize);
4169 if(!pInitBuf && initSize)
4170 {
4171 IPP_RELEASE
4172 return false;
4173 }
4174
4175 if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < 0)
4176 {
4177 IPP_RELEASE
4178 return false;
4179 }
4180
4181 if(CV_INSTRUMENT_FUN_IPP(ippiDCT_32f_C1R, (float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDCTSpec, pBuffer) < 0)
4182 {
4183 IPP_RELEASE
4184 return false;
4185 }
4186
4187 IPP_RELEASE
4188 return true;
4189#undef IPP_RELEASE
4190#elif IPP_VERSION_X100 >= 700
4191 IppStatus status;
4192 void* pDCTSpec;
4193 AutoBuffer<uchar> buf;
4194 uchar* pBuffer = 0;
4195 int bufSize=0;
4196
4197 IppiSize srcRoiSize = {width, height};
4198
4199 CV_SUPPRESS_DEPRECATED_START
4200
4201 ippiDCTFunc ippDctFun = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4202 ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
4203 ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f;
4204 ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
4205
4206 status = ippStsErr;
4207
4208 if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
4209 {
4210 buf.allocate( bufSize );
4211 pBuffer = (uchar*)buf;
4212
4213 status = ippDctFun((float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDCTSpec, (Ipp8u*)pBuffer);
4214 }
4215
4216 if (pDCTSpec)
4217 ippFree(pDCTSpec);
4218
4219 CV_SUPPRESS_DEPRECATED_END
4220
4221 return status >= 0;
4222#else
4223 CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(inv); CV_UNUSED(row);
4224 return false;
4225#endif
4226 }
4227}
4228}
4229#endif
4230
4231namespace cv {
4232
4233class OcvDctImpl CV_FINAL : public hal::DCT2D
4234{
4235public:
4236 OcvDftOptions opt;
4237
4238 int _factors[34];
4239 AutoBuffer<uint> wave_buf;
4240 AutoBuffer<int> itab_buf;
4241
4242 DCTFunc dct_func;
4243 bool isRowTransform;
4244 bool isInverse;
4245 bool isContinuous;
4246 int start_stage;
4247 int end_stage;
4248 int width;
4249 int height;
4250 int depth;
4251
4252 void init(int _width, int _height, int _depth, int flags)
4253 {
4254 width = _width;
4255 height = _height;
4256 depth = _depth;
4257 isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
4258 isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0;
4259 isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0;
4260 static DCTFunc dct_tbl[4] =
4261 {
4262 (DCTFunc)DCT_32f,
4263 (DCTFunc)IDCT_32f,
4264 (DCTFunc)DCT_64f,
4265 (DCTFunc)IDCT_64f
4266 };
4267 dct_func = dct_tbl[(int)isInverse + (depth == CV_64F)*2];
4268 opt.nf = 0;
4269 opt.isComplex = false;
4270 opt.isInverse = false;
4271 opt.noPermute = false;
4272 opt.scale = 1.;
4273 opt.factors = _factors;
4274
4275 if (isRowTransform || height == 1 || (width == 1 && isContinuous))
4276 {
4277 start_stage = end_stage = 0;
4278 }
4279 else
4280 {
4281 start_stage = (width == 1);
4282 end_stage = 1;
4283 }
4284 }
4285 void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step) CV_OVERRIDE
4286 {
4287 CV_IPP_RUN(IPP_VERSION_X100 >= 700 && depth == CV_32F, ippi_DCT_32f(src, src_step, dst, dst_step, width, height, isInverse, isRowTransform))
4288
4289 AutoBuffer<uchar> dct_wave;
4290 AutoBuffer<uchar> src_buf, dst_buf;
4291 uchar *src_dft_buf = 0, *dst_dft_buf = 0;
4292 int prev_len = 0;
4293 int elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
4294 int complex_elem_size = elem_size*2;
4295
4296 for(int stage = start_stage ; stage <= end_stage; stage++ )
4297 {
4298 const uchar* sptr = src;
4299 uchar* dptr = dst;
4300 size_t sstep0, sstep1, dstep0, dstep1;
4301 int len, count;
4302
4303 if( stage == 0 )
4304 {
4305 len = width;
4306 count = height;
4307 if( len == 1 && !isRowTransform )
4308 {
4309 len = height;
4310 count = 1;
4311 }
4312 sstep0 = src_step;
4313 dstep0 = dst_step;
4314 sstep1 = dstep1 = elem_size;
4315 }
4316 else
4317 {
4318 len = height;
4319 count = width;
4320 sstep1 = src_step;
4321 dstep1 = dst_step;
4322 sstep0 = dstep0 = elem_size;
4323 }
4324
4325 opt.n = len;
4326 opt.tab_size = len;
4327
4328 if( len != prev_len )
4329 {
4330 if( len > 1 && (len & 1) )
4331 CV_Error( cv::Error::StsNotImplemented, "Odd-size DCT\'s are not implemented" );
4332
4333 opt.nf = DFTFactorize( n: len, factors: opt.factors );
4334 bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];
4335
4336 wave_buf.allocate(size: len*complex_elem_size);
4337 opt.wave = wave_buf.data();
4338 itab_buf.allocate(size: len);
4339 opt.itab = itab_buf.data();
4340 DFTInit( n0: len, nf: opt.nf, factors: opt.factors, itab: opt.itab, elem_size: complex_elem_size, wave: opt.wave, inv_itab: isInverse );
4341
4342 dct_wave.allocate(size: (len/2 + 1)*complex_elem_size);
4343 src_buf.allocate(size: len*elem_size);
4344 src_dft_buf = src_buf.data();
4345 if(!inplace_transform)
4346 {
4347 dst_buf.allocate(size: len*elem_size);
4348 dst_dft_buf = dst_buf.data();
4349 }
4350 else
4351 {
4352 dst_dft_buf = src_buf.data();
4353 }
4354 DCTInit( n: len, elem_size: complex_elem_size, wave: dct_wave.data(), inv: isInverse);
4355 prev_len = len;
4356 }
4357 // otherwise reuse the tables calculated on the previous stage
4358 for(unsigned i = 0; i < static_cast<unsigned>(count); i++ )
4359 {
4360 dct_func( opt, sptr + i*sstep0, sstep1, src_dft_buf, dst_dft_buf,
4361 dptr + i*dstep0, dstep1, dct_wave.data());
4362 }
4363 src = dst;
4364 src_step = dst_step;
4365 }
4366 }
4367};
4368
4369struct ReplacementDCT2D : public hal::DCT2D
4370{
4371 cvhalDFT *context;
4372 bool isInitialized;
4373
4374 ReplacementDCT2D() : context(0), isInitialized(false) {}
4375 bool init(int width, int height, int depth, int flags)
4376 {
4377 int res = cv_hal_dctInit2D(context: &context, width, height, depth, flags);
4378 isInitialized = (res == CV_HAL_ERROR_OK);
4379 return isInitialized;
4380 }
4381 void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) CV_OVERRIDE
4382 {
4383 if (isInitialized)
4384 {
4385 CALL_HAL(dct2D, cv_hal_dct2D, context, src_data, src_step, dst_data, dst_step);
4386 }
4387 }
4388 ~ReplacementDCT2D()
4389 {
4390 if (isInitialized)
4391 {
4392 CALL_HAL(dctFree2D, cv_hal_dctFree2D, context);
4393 }
4394 }
4395};
4396
4397namespace hal {
4398
4399Ptr<DCT2D> DCT2D::create(int width, int height, int depth, int flags)
4400{
4401 {
4402 ReplacementDCT2D *impl = new ReplacementDCT2D();
4403 if (impl->init(width, height, depth, flags))
4404 {
4405 return Ptr<DCT2D>(impl);
4406 }
4407 delete impl;
4408 }
4409 {
4410 OcvDctImpl *impl = new OcvDctImpl();
4411 impl->init(width: width, height: height, depth: depth, flags);
4412 return Ptr<DCT2D>(impl);
4413 }
4414}
4415
4416} // cv::hal::
4417} // cv::
4418
4419void cv::dct( InputArray _src0, OutputArray _dst, int flags )
4420{
4421 CV_INSTRUMENT_REGION();
4422
4423 Mat src0 = _src0.getMat(), src = src0;
4424 int type = src.type(), depth = src.depth();
4425
4426 CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
4427 _dst.create( rows: src.rows, cols: src.cols, type );
4428 Mat dst = _dst.getMat();
4429
4430 int f = 0;
4431 if ((flags & DFT_ROWS) != 0)
4432 f |= CV_HAL_DFT_ROWS;
4433 if ((flags & DCT_INVERSE) != 0)
4434 f |= CV_HAL_DFT_INVERSE;
4435 if (src.isContinuous() && dst.isContinuous())
4436 f |= CV_HAL_DFT_IS_CONTINUOUS;
4437
4438 Ptr<hal::DCT2D> c = hal::DCT2D::create(width: src.cols, height: src.rows, depth, flags: f);
4439 c->apply(src_data: src.data, src_step: src.step, dst_data: dst.data, dst_step: dst.step);
4440}
4441
4442
4443void cv::idct( InputArray src, OutputArray dst, int flags )
4444{
4445 CV_INSTRUMENT_REGION();
4446
4447 dct( src0: src, dst: dst, flags: flags | DCT_INVERSE );
4448}
4449
4450namespace cv
4451{
4452
4453static const int optimalDFTSizeTab[] = {
44541, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24, 25, 27, 30, 32, 36, 40, 45, 48,
445550, 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135, 144, 150, 160,
4456162, 180, 192, 200, 216, 225, 240, 243, 250, 256, 270, 288, 300, 320, 324, 360, 375,
4457384, 400, 405, 432, 450, 480, 486, 500, 512, 540, 576, 600, 625, 640, 648, 675, 720,
4458729, 750, 768, 800, 810, 864, 900, 960, 972, 1000, 1024, 1080, 1125, 1152, 1200,
44591215, 1250, 1280, 1296, 1350, 1440, 1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875,
44601920, 1944, 2000, 2025, 2048, 2160, 2187, 2250, 2304, 2400, 2430, 2500, 2560, 2592,
44612700, 2880, 2916, 3000, 3072, 3125, 3200, 3240, 3375, 3456, 3600, 3645, 3750, 3840,
44623888, 4000, 4050, 4096, 4320, 4374, 4500, 4608, 4800, 4860, 5000, 5120, 5184, 5400,
44635625, 5760, 5832, 6000, 6075, 6144, 6250, 6400, 6480, 6561, 6750, 6912, 7200, 7290,
44647500, 7680, 7776, 8000, 8100, 8192, 8640, 8748, 9000, 9216, 9375, 9600, 9720, 10000,
446510125, 10240, 10368, 10800, 10935, 11250, 11520, 11664, 12000, 12150, 12288, 12500,
446612800, 12960, 13122, 13500, 13824, 14400, 14580, 15000, 15360, 15552, 15625, 16000,
446716200, 16384, 16875, 17280, 17496, 18000, 18225, 18432, 18750, 19200, 19440, 19683,
446820000, 20250, 20480, 20736, 21600, 21870, 22500, 23040, 23328, 24000, 24300, 24576,
446925000, 25600, 25920, 26244, 27000, 27648, 28125, 28800, 29160, 30000, 30375, 30720,
447031104, 31250, 32000, 32400, 32768, 32805, 33750, 34560, 34992, 36000, 36450, 36864,
447137500, 38400, 38880, 39366, 40000, 40500, 40960, 41472, 43200, 43740, 45000, 46080,
447246656, 46875, 48000, 48600, 49152, 50000, 50625, 51200, 51840, 52488, 54000, 54675,
447355296, 56250, 57600, 58320, 59049, 60000, 60750, 61440, 62208, 62500, 64000, 64800,
447465536, 65610, 67500, 69120, 69984, 72000, 72900, 73728, 75000, 76800, 77760, 78125,
447578732, 80000, 81000, 81920, 82944, 84375, 86400, 87480, 90000, 91125, 92160, 93312,
447693750, 96000, 97200, 98304, 98415, 100000, 101250, 102400, 103680, 104976, 108000,
4477109350, 110592, 112500, 115200, 116640, 118098, 120000, 121500, 122880, 124416, 125000,
4478128000, 129600, 131072, 131220, 135000, 138240, 139968, 140625, 144000, 145800, 147456,
4479150000, 151875, 153600, 155520, 156250, 157464, 160000, 162000, 163840, 164025, 165888,
4480168750, 172800, 174960, 177147, 180000, 182250, 184320, 186624, 187500, 192000, 194400,
4481196608, 196830, 200000, 202500, 204800, 207360, 209952, 216000, 218700, 221184, 225000,
4482230400, 233280, 234375, 236196, 240000, 243000, 245760, 248832, 250000, 253125, 256000,
4483259200, 262144, 262440, 270000, 273375, 276480, 279936, 281250, 288000, 291600, 294912,
4484295245, 300000, 303750, 307200, 311040, 312500, 314928, 320000, 324000, 327680, 328050,
4485331776, 337500, 345600, 349920, 354294, 360000, 364500, 368640, 373248, 375000, 384000,
4486388800, 390625, 393216, 393660, 400000, 405000, 409600, 414720, 419904, 421875, 432000,
4487437400, 442368, 450000, 455625, 460800, 466560, 468750, 472392, 480000, 486000, 491520,
4488492075, 497664, 500000, 506250, 512000, 518400, 524288, 524880, 531441, 540000, 546750,
4489552960, 559872, 562500, 576000, 583200, 589824, 590490, 600000, 607500, 614400, 622080,
4490625000, 629856, 640000, 648000, 655360, 656100, 663552, 675000, 691200, 699840, 703125,
4491708588, 720000, 729000, 737280, 746496, 750000, 759375, 768000, 777600, 781250, 786432,
4492787320, 800000, 810000, 819200, 820125, 829440, 839808, 843750, 864000, 874800, 884736,
4493885735, 900000, 911250, 921600, 933120, 937500, 944784, 960000, 972000, 983040, 984150,
4494995328, 1000000, 1012500, 1024000, 1036800, 1048576, 1049760, 1062882, 1080000, 1093500,
44951105920, 1119744, 1125000, 1152000, 1166400, 1171875, 1179648, 1180980, 1200000,
44961215000, 1228800, 1244160, 1250000, 1259712, 1265625, 1280000, 1296000, 1310720,
44971312200, 1327104, 1350000, 1366875, 1382400, 1399680, 1406250, 1417176, 1440000,
44981458000, 1474560, 1476225, 1492992, 1500000, 1518750, 1536000, 1555200, 1562500,
44991572864, 1574640, 1594323, 1600000, 1620000, 1638400, 1640250, 1658880, 1679616,
45001687500, 1728000, 1749600, 1769472, 1771470, 1800000, 1822500, 1843200, 1866240,
45011875000, 1889568, 1920000, 1944000, 1953125, 1966080, 1968300, 1990656, 2000000,
45022025000, 2048000, 2073600, 2097152, 2099520, 2109375, 2125764, 2160000, 2187000,
45032211840, 2239488, 2250000, 2278125, 2304000, 2332800, 2343750, 2359296, 2361960,
45042400000, 2430000, 2457600, 2460375, 2488320, 2500000, 2519424, 2531250, 2560000,
45052592000, 2621440, 2624400, 2654208, 2657205, 2700000, 2733750, 2764800, 2799360,
45062812500, 2834352, 2880000, 2916000, 2949120, 2952450, 2985984, 3000000, 3037500,
45073072000, 3110400, 3125000, 3145728, 3149280, 3188646, 3200000, 3240000, 3276800,
45083280500, 3317760, 3359232, 3375000, 3456000, 3499200, 3515625, 3538944, 3542940,
45093600000, 3645000, 3686400, 3732480, 3750000, 3779136, 3796875, 3840000, 3888000,
45103906250, 3932160, 3936600, 3981312, 4000000, 4050000, 4096000, 4100625, 4147200,
45114194304, 4199040, 4218750, 4251528, 4320000, 4374000, 4423680, 4428675, 4478976,
45124500000, 4556250, 4608000, 4665600, 4687500, 4718592, 4723920, 4782969, 4800000,
45134860000, 4915200, 4920750, 4976640, 5000000, 5038848, 5062500, 5120000, 5184000,
45145242880, 5248800, 5308416, 5314410, 5400000, 5467500, 5529600, 5598720, 5625000,
45155668704, 5760000, 5832000, 5859375, 5898240, 5904900, 5971968, 6000000, 6075000,
45166144000, 6220800, 6250000, 6291456, 6298560, 6328125, 6377292, 6400000, 6480000,
45176553600, 6561000, 6635520, 6718464, 6750000, 6834375, 6912000, 6998400, 7031250,
45187077888, 7085880, 7200000, 7290000, 7372800, 7381125, 7464960, 7500000, 7558272,
45197593750, 7680000, 7776000, 7812500, 7864320, 7873200, 7962624, 7971615, 8000000,
45208100000, 8192000, 8201250, 8294400, 8388608, 8398080, 8437500, 8503056, 8640000,
45218748000, 8847360, 8857350, 8957952, 9000000, 9112500, 9216000, 9331200, 9375000,
45229437184, 9447840, 9565938, 9600000, 9720000, 9765625, 9830400, 9841500, 9953280,
452310000000, 10077696, 10125000, 10240000, 10368000, 10485760, 10497600, 10546875, 10616832,
452410628820, 10800000, 10935000, 11059200, 11197440, 11250000, 11337408, 11390625, 11520000,
452511664000, 11718750, 11796480, 11809800, 11943936, 12000000, 12150000, 12288000, 12301875,
452612441600, 12500000, 12582912, 12597120, 12656250, 12754584, 12800000, 12960000, 13107200,
452713122000, 13271040, 13286025, 13436928, 13500000, 13668750, 13824000, 13996800, 14062500,
452814155776, 14171760, 14400000, 14580000, 14745600, 14762250, 14929920, 15000000, 15116544,
452915187500, 15360000, 15552000, 15625000, 15728640, 15746400, 15925248, 15943230, 16000000,
453016200000, 16384000, 16402500, 16588800, 16777216, 16796160, 16875000, 17006112, 17280000,
453117496000, 17578125, 17694720, 17714700, 17915904, 18000000, 18225000, 18432000, 18662400,
453218750000, 18874368, 18895680, 18984375, 19131876, 19200000, 19440000, 19531250, 19660800,
453319683000, 19906560, 20000000, 20155392, 20250000, 20480000, 20503125, 20736000, 20971520,
453420995200, 21093750, 21233664, 21257640, 21600000, 21870000, 22118400, 22143375, 22394880,
453522500000, 22674816, 22781250, 23040000, 23328000, 23437500, 23592960, 23619600, 23887872,
453623914845, 24000000, 24300000, 24576000, 24603750, 24883200, 25000000, 25165824, 25194240,
453725312500, 25509168, 25600000, 25920000, 26214400, 26244000, 26542080, 26572050, 26873856,
453827000000, 27337500, 27648000, 27993600, 28125000, 28311552, 28343520, 28800000, 29160000,
453929296875, 29491200, 29524500, 29859840, 30000000, 30233088, 30375000, 30720000, 31104000,
454031250000, 31457280, 31492800, 31640625, 31850496, 31886460, 32000000, 32400000, 32768000,
454132805000, 33177600, 33554432, 33592320, 33750000, 34012224, 34171875, 34560000, 34992000,
454235156250, 35389440, 35429400, 35831808, 36000000, 36450000, 36864000, 36905625, 37324800,
454337500000, 37748736, 37791360, 37968750, 38263752, 38400000, 38880000, 39062500, 39321600,
454439366000, 39813120, 39858075, 40000000, 40310784, 40500000, 40960000, 41006250, 41472000,
454541943040, 41990400, 42187500, 42467328, 42515280, 43200000, 43740000, 44236800, 44286750,
454644789760, 45000000, 45349632, 45562500, 46080000, 46656000, 46875000, 47185920, 47239200,
454747775744, 47829690, 48000000, 48600000, 48828125, 49152000, 49207500, 49766400, 50000000,
454850331648, 50388480, 50625000, 51018336, 51200000, 51840000, 52428800, 52488000, 52734375,
454953084160, 53144100, 53747712, 54000000, 54675000, 55296000, 55987200, 56250000, 56623104,
455056687040, 56953125, 57600000, 58320000, 58593750, 58982400, 59049000, 59719680, 60000000,
455160466176, 60750000, 61440000, 61509375, 62208000, 62500000, 62914560, 62985600, 63281250,
455263700992, 63772920, 64000000, 64800000, 65536000, 65610000, 66355200, 66430125, 67108864,
455367184640, 67500000, 68024448, 68343750, 69120000, 69984000, 70312500, 70778880, 70858800,
455471663616, 72000000, 72900000, 73728000, 73811250, 74649600, 75000000, 75497472, 75582720,
455575937500, 76527504, 76800000, 77760000, 78125000, 78643200, 78732000, 79626240, 79716150,
455680000000, 80621568, 81000000, 81920000, 82012500, 82944000, 83886080, 83980800, 84375000,
455784934656, 85030560, 86400000, 87480000, 87890625, 88473600, 88573500, 89579520, 90000000,
455890699264, 91125000, 92160000, 93312000, 93750000, 94371840, 94478400, 94921875, 95551488,
455995659380, 96000000, 97200000, 97656250, 98304000, 98415000, 99532800, 100000000,
4560100663296, 100776960, 101250000, 102036672, 102400000, 102515625, 103680000, 104857600,
4561104976000, 105468750, 106168320, 106288200, 107495424, 108000000, 109350000, 110592000,
4562110716875, 111974400, 112500000, 113246208, 113374080, 113906250, 115200000, 116640000,
4563117187500, 117964800, 118098000, 119439360, 119574225, 120000000, 120932352, 121500000,
4564122880000, 123018750, 124416000, 125000000, 125829120, 125971200, 126562500, 127401984,
4565127545840, 128000000, 129600000, 131072000, 131220000, 132710400, 132860250, 134217728,
4566134369280, 135000000, 136048896, 136687500, 138240000, 139968000, 140625000, 141557760,
4567141717600, 143327232, 144000000, 145800000, 146484375, 147456000, 147622500, 149299200,
4568150000000, 150994944, 151165440, 151875000, 153055008, 153600000, 155520000, 156250000,
4569157286400, 157464000, 158203125, 159252480, 159432300, 160000000, 161243136, 162000000,
4570163840000, 164025000, 165888000, 167772160, 167961600, 168750000, 169869312, 170061120,
4571170859375, 172800000, 174960000, 175781250, 176947200, 177147000, 179159040, 180000000,
4572181398528, 182250000, 184320000, 184528125, 186624000, 187500000, 188743680, 188956800,
4573189843750, 191102976, 191318760, 192000000, 194400000, 195312500, 196608000, 196830000,
4574199065600, 199290375, 200000000, 201326592, 201553920, 202500000, 204073344, 204800000,
4575205031250, 207360000, 209715200, 209952000, 210937500, 212336640, 212576400, 214990848,
4576216000000, 218700000, 221184000, 221433750, 223948800, 225000000, 226492416, 226748160,
4577227812500, 230400000, 233280000, 234375000, 235929600, 236196000, 238878720, 239148450,
4578240000000, 241864704, 243000000, 244140625, 245760000, 246037500, 248832000, 250000000,
4579251658240, 251942400, 253125000, 254803968, 255091680, 256000000, 259200000, 262144000,
4580262440000, 263671875, 265420800, 265720500, 268435456, 268738560, 270000000, 272097792,
4581273375000, 276480000, 279936000, 281250000, 283115520, 283435200, 284765625, 286654464,
4582288000000, 291600000, 292968750, 294912000, 295245000, 298598400, 300000000, 301989888,
4583302330880, 303750000, 306110016, 307200000, 307546875, 311040000, 312500000, 314572800,
4584314928000, 316406250, 318504960, 318864600, 320000000, 322486272, 324000000, 327680000,
4585328050000, 331776000, 332150625, 335544320, 335923200, 337500000, 339738624, 340122240,
4586341718750, 345600000, 349920000, 351562500, 353894400, 354294000, 358318080, 360000000,
4587362797056, 364500000, 368640000, 369056250, 373248000, 375000000, 377487360, 377913600,
4588379687500, 382205952, 382637520, 384000000, 388800000, 390625000, 393216000, 393660000,
4589398131200, 398580750, 400000000, 402653184, 403107840, 405000000, 408146688, 409600000,
4590410062500, 414720000, 419430400, 419904000, 421875000, 424673280, 425152800, 429981696,
4591432000000, 437400000, 439453125, 442368000, 442867500, 447897600, 450000000, 452984832,
4592453496320, 455625000, 460800000, 466560000, 468750000, 471859200, 472392000, 474609375,
4593477757440, 478296900, 480000000, 483729408, 486000000, 488281250, 491520000, 492075000,
4594497664000, 500000000, 503316480, 503884800, 506250000, 509607936, 510183360, 512000000,
4595512578125, 518400000, 524288000, 524880000, 527343750, 530841600, 531441000, 536870912,
4596537477120, 540000000, 544195584, 546750000, 552960000, 553584375, 559872000, 562500000,
4597566231040, 566870400, 569531250, 573308928, 576000000, 583200000, 585937500, 589824000,
4598590490000, 597196800, 597871125, 600000000, 603979776, 604661760, 607500000, 612220032,
4599614400000, 615093750, 622080000, 625000000, 629145600, 629856000, 632812500, 637009920,
4600637729200, 640000000, 644972544, 648000000, 655360000, 656100000, 663552000, 664301250,
4601671088640, 671846400, 675000000, 679477248, 680244480, 683437500, 691200000, 699840000,
4602703125000, 707788800, 708588000, 716636160, 720000000, 725594112, 729000000, 732421875,
4603737280000, 738112500, 746496000, 750000000, 754974720, 755827200, 759375000, 764411904,
4604765275040, 768000000, 777600000, 781250000, 786432000, 787320000, 791015625, 796262400,
4605797161500, 800000000, 805306368, 806215680, 810000000, 816293376, 819200000, 820125000,
4606829440000, 838860800, 839808000, 843750000, 849346560, 850305600, 854296875, 859963392,
4607864000000, 874800000, 878906250, 884736000, 885735000, 895795200, 900000000, 905969664,
4608906992640, 911250000, 921600000, 922640625, 933120000, 937500000, 943718400, 944784000,
4609949218750, 955514880, 956593800, 960000000, 967458816, 972000000, 976562500, 983040000,
4610984150000, 995328000, 996451875, 1000000000, 1006632960, 1007769600, 1012500000,
46111019215872, 1020366720, 1024000000, 1025156250, 1036800000, 1048576000, 1049760000,
46121054687500, 1061683200, 1062882000, 1073741824, 1074954240, 1080000000, 1088391168,
46131093500000, 1105920000, 1107168750, 1119744000, 1125000000, 1132462080, 1133740800,
46141139062500, 1146617856, 1152000000, 1166400000, 1171875000, 1179648000, 1180980000,
46151194393600, 1195742250, 1200000000, 1207959552, 1209323520, 1215000000, 1220703125,
46161224440064, 1228800000, 1230187500, 1244160000, 1250000000, 1258291200, 1259712000,
46171265625000, 1274019840, 1275458400, 1280000000, 1289945088, 1296000000, 1310720000,
46181312200000, 1318359375, 1327104000, 1328602500, 1342177280, 1343692800, 1350000000,
46191358954496, 1360488960, 1366875000, 1382400000, 1399680000, 1406250000, 1415577600,
46201417176000, 1423828125, 1433272320, 1440000000, 1451188224, 1458000000, 1464843750,
46211474560000, 1476225000, 1492992000, 1500000000, 1509949440, 1511654400, 1518750000,
46221528823808, 1530550080, 1536000000, 1537734375, 1555200000, 1562500000, 1572864000,
46231574640000, 1582031250, 1592524800, 1594323000, 1600000000, 1610612736, 1612431360,
46241620000000, 1632586752, 1638400000, 1640250000, 1658880000, 1660753125, 1677721600,
46251679616000, 1687500000, 1698693120, 1700611200, 1708593750, 1719926784, 1728000000,
46261749600000, 1757812500, 1769472000, 1771470000, 1791590400, 1800000000, 1811939328,
46271813985280, 1822500000, 1843200000, 1845281250, 1866240000, 1875000000, 1887436800,
46281889568000, 1898437500, 1911029760, 1913187600, 1920000000, 1934917632, 1944000000,
46291953125000, 1966080000, 1968300000, 1990656000, 1992903750, 2000000000, 2013265920,
46302015539200, 2025000000, 2038431744, 2040733440, 2048000000, 2050312500, 2073600000,
46312097152000, 2099520000, 2109375000, 2123366400, 2125764000
4632};
4633
4634}
4635
4636int cv::getOptimalDFTSize( int size0 )
4637{
4638 int a = 0, b = sizeof(optimalDFTSizeTab)/sizeof(optimalDFTSizeTab[0]) - 1;
4639 if( (unsigned)size0 >= (unsigned)optimalDFTSizeTab[b] )
4640 return -1;
4641
4642 while( a < b )
4643 {
4644 int c = (a + b) >> 1;
4645 if( size0 <= optimalDFTSizeTab[c] )
4646 b = c;
4647 else
4648 a = c+1;
4649 }
4650
4651 return optimalDFTSizeTab[b];
4652}
4653
4654
4655#ifndef OPENCV_EXCLUDE_C_API
4656
4657CV_IMPL void
4658cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows )
4659{
4660 cv::Mat src = cv::cvarrToMat(arr: srcarr), dst0 = cv::cvarrToMat(arr: dstarr), dst = dst0;
4661 int _flags = ((flags & CV_DXT_INVERSE) ? cv::DFT_INVERSE : 0) |
4662 ((flags & CV_DXT_SCALE) ? cv::DFT_SCALE : 0) |
4663 ((flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0);
4664
4665 CV_Assert( src.size == dst.size );
4666
4667 if( src.type() != dst.type() )
4668 {
4669 if( dst.channels() == 2 )
4670 _flags |= cv::DFT_COMPLEX_OUTPUT;
4671 else
4672 _flags |= cv::DFT_REAL_OUTPUT;
4673 }
4674
4675 cv::dft( src0: src, dst: dst, flags: _flags, nonzero_rows );
4676 CV_Assert( dst.data == dst0.data ); // otherwise it means that the destination size or type was incorrect
4677}
4678
4679
4680CV_IMPL void
4681cvMulSpectrums( const CvArr* srcAarr, const CvArr* srcBarr,
4682 CvArr* dstarr, int flags )
4683{
4684 cv::Mat srcA = cv::cvarrToMat(arr: srcAarr),
4685 srcB = cv::cvarrToMat(arr: srcBarr),
4686 dst = cv::cvarrToMat(arr: dstarr);
4687 CV_Assert( srcA.size == dst.size && srcA.type() == dst.type() );
4688
4689 cv::mulSpectrums(srcA: srcA, srcB: srcB, dst: dst,
4690 flags: (flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0,
4691 conjB: (flags & CV_DXT_MUL_CONJ) != 0 );
4692}
4693
4694
4695CV_IMPL void
4696cvDCT( const CvArr* srcarr, CvArr* dstarr, int flags )
4697{
4698 cv::Mat src = cv::cvarrToMat(arr: srcarr), dst = cv::cvarrToMat(arr: dstarr);
4699 CV_Assert( src.size == dst.size && src.type() == dst.type() );
4700 int _flags = ((flags & CV_DXT_INVERSE) ? cv::DCT_INVERSE : 0) |
4701 ((flags & CV_DXT_ROWS) ? cv::DCT_ROWS : 0);
4702 cv::dct( src0: src, dst: dst, flags: _flags );
4703}
4704
4705
4706CV_IMPL int
4707cvGetOptimalDFTSize( int size0 )
4708{
4709 return cv::getOptimalDFTSize(size0);
4710}
4711
4712#endif // OPENCV_EXCLUDE_C_API
4713/* End of file. */
4714

source code of opencv/modules/core/src/dxt.cpp