dxt.cpp source code [opencv/modules/core/src/dxt.cpp]

1	/M///////////////////////////////////////////////////////////////////////////////////////*
2	//
3	// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4	//
5	// By downloading, copying, installing or using the software you agree to this license.
6	// If you do not agree to this license, do not download, install,
7	// copy or use the software.
8	//
9	//
10	// Intel License Agreement
11	// For Open Source Computer Vision Library
12	//
13	// Copyright (C) 2000, Intel Corporation, all rights reserved.
14	// Third party copyrights are property of their respective owners.
15	//
16	// Redistribution and use in source and binary forms, with or without modification,
17	// are permitted provided that the following conditions are met:
18	//
19	// Redistribution's of source code must retain the above copyright notice,*
20	// this list of conditions and the following disclaimer.
21	//
22	// Redistribution's in binary form must reproduce the above copyright notice,*
23	// this list of conditions and the following disclaimer in the documentation
24	// and/or other materials provided with the distribution.
25	//
26	// The name of Intel Corporation may not be used to endorse or promote products*
27	// derived from this software without specific prior written permission.
28	//
29	// This software is provided by the copyright holders and contributors "as is" and
30	// any express or implied warranties, including, but not limited to, the implied
31	// warranties of merchantability and fitness for a particular purpose are disclaimed.
32	// In no event shall the Intel Corporation or contributors be liable for any direct,
33	// indirect, incidental, special, exemplary, or consequential damages
34	// (including, but not limited to, procurement of substitute goods or services;
35	// loss of use, data, or profits; or business interruption) however caused
36	// and on any theory of liability, whether in contract, strict liability,
37	// or tort (including negligence or otherwise) arising in any way out of
38	// the use of this software, even if advised of the possibility of such damage.
39	//
40	//M/*
41
42	#include "precomp.hpp"
43	#include "opencv2/core/opencl/runtime/opencl_clfft.hpp"
44	#include "opencv2/core/opencl/runtime/opencl_core.hpp"
45	#include "opencl_kernels_core.hpp"
46	#include <map>
47
48	namespace cv
49	{
50
51	// On Win64 optimized versions of DFT and DCT fail the tests (fixed in VS2010)
52	#if defined _MSC_VER && !defined CV_ICC && defined _M_X64 && _MSC_VER < 1600
53	# pragma optimize("", off)
54	# pragma warning(disable: 4748)
55	#endif
56
57	#if IPP_VERSION_X100 >= 710
58	#define USE_IPP_DFT 1
59	#else
60	#undef USE_IPP_DFT
61	#endif
62
63	/**************************************************************************************\
64	Discrete Fourier Transform
65	\**************************************************************************************/
66
67	static unsigned char bitrevTab[] =
68	{
69	`0x00`,`0x80`,`0x40`,`0xc0`,`0x20`,`0xa0`,`0x60`,`0xe0`,`0x10`,`0x90`,`0x50`,`0xd0`,`0x30`,`0xb0`,`0x70`,`0xf0`,
70	`0x08`,`0x88`,`0x48`,`0xc8`,`0x28`,`0xa8`,`0x68`,`0xe8`,`0x18`,`0x98`,`0x58`,`0xd8`,`0x38`,`0xb8`,`0x78`,`0xf8`,
71	`0x04`,`0x84`,`0x44`,`0xc4`,`0x24`,`0xa4`,`0x64`,`0xe4`,`0x14`,`0x94`,`0x54`,`0xd4`,`0x34`,`0xb4`,`0x74`,`0xf4`,
72	`0x0c`,`0x8c`,`0x4c`,`0xcc`,`0x2c`,`0xac`,`0x6c`,`0xec`,`0x1c`,`0x9c`,`0x5c`,`0xdc`,`0x3c`,`0xbc`,`0x7c`,`0xfc`,
73	`0x02`,`0x82`,`0x42`,`0xc2`,`0x22`,`0xa2`,`0x62`,`0xe2`,`0x12`,`0x92`,`0x52`,`0xd2`,`0x32`,`0xb2`,`0x72`,`0xf2`,
74	`0x0a`,`0x8a`,`0x4a`,`0xca`,`0x2a`,`0xaa`,`0x6a`,`0xea`,`0x1a`,`0x9a`,`0x5a`,`0xda`,`0x3a`,`0xba`,`0x7a`,`0xfa`,
75	`0x06`,`0x86`,`0x46`,`0xc6`,`0x26`,`0xa6`,`0x66`,`0xe6`,`0x16`,`0x96`,`0x56`,`0xd6`,`0x36`,`0xb6`,`0x76`,`0xf6`,
76	`0x0e`,`0x8e`,`0x4e`,`0xce`,`0x2e`,`0xae`,`0x6e`,`0xee`,`0x1e`,`0x9e`,`0x5e`,`0xde`,`0x3e`,`0xbe`,`0x7e`,`0xfe`,
77	`0x01`,`0x81`,`0x41`,`0xc1`,`0x21`,`0xa1`,`0x61`,`0xe1`,`0x11`,`0x91`,`0x51`,`0xd1`,`0x31`,`0xb1`,`0x71`,`0xf1`,
78	`0x09`,`0x89`,`0x49`,`0xc9`,`0x29`,`0xa9`,`0x69`,`0xe9`,`0x19`,`0x99`,`0x59`,`0xd9`,`0x39`,`0xb9`,`0x79`,`0xf9`,
79	`0x05`,`0x85`,`0x45`,`0xc5`,`0x25`,`0xa5`,`0x65`,`0xe5`,`0x15`,`0x95`,`0x55`,`0xd5`,`0x35`,`0xb5`,`0x75`,`0xf5`,
80	`0x0d`,`0x8d`,`0x4d`,`0xcd`,`0x2d`,`0xad`,`0x6d`,`0xed`,`0x1d`,`0x9d`,`0x5d`,`0xdd`,`0x3d`,`0xbd`,`0x7d`,`0xfd`,
81	`0x03`,`0x83`,`0x43`,`0xc3`,`0x23`,`0xa3`,`0x63`,`0xe3`,`0x13`,`0x93`,`0x53`,`0xd3`,`0x33`,`0xb3`,`0x73`,`0xf3`,
82	`0x0b`,`0x8b`,`0x4b`,`0xcb`,`0x2b`,`0xab`,`0x6b`,`0xeb`,`0x1b`,`0x9b`,`0x5b`,`0xdb`,`0x3b`,`0xbb`,`0x7b`,`0xfb`,
83	`0x07`,`0x87`,`0x47`,`0xc7`,`0x27`,`0xa7`,`0x67`,`0xe7`,`0x17`,`0x97`,`0x57`,`0xd7`,`0x37`,`0xb7`,`0x77`,`0xf7`,
84	`0x0f`,`0x8f`,`0x4f`,`0xcf`,`0x2f`,`0xaf`,`0x6f`,`0xef`,`0x1f`,`0x9f`,`0x5f`,`0xdf`,`0x3f`,`0xbf`,`0x7f`,`0xff`
85	};
86
87	static const double DFTTab[][`2`] =
88	{
89	{ `1.00000000000000000`, `0.00000000000000000` },
90	{-`1.00000000000000000`, `0.00000000000000000` },
91	{ `0.00000000000000000`, `1.00000000000000000` },
92	{ `0.70710678118654757`, `0.70710678118654746` },
93	{ `0.92387953251128674`, `0.38268343236508978` },
94	{ `0.98078528040323043`, `0.19509032201612825` },
95	{ `0.99518472667219693`, `0.09801714032956060` },
96	{ `0.99879545620517241`, `0.04906767432741802` },
97	{ `0.99969881869620425`, `0.02454122852291229` },
98	{ `0.99992470183914450`, `0.01227153828571993` },
99	{ `0.99998117528260111`, `0.00613588464915448` },
100	{ `0.99999529380957619`, `0.00306795676296598` },
101	{ `0.99999882345170188`, `0.00153398018628477` },
102	{ `0.99999970586288223`, `0.00076699031874270` },
103	{ `0.99999992646571789`, `0.00038349518757140` },
104	{ `0.99999998161642933`, `0.00019174759731070` },
105	{ `0.99999999540410733`, `0.00009587379909598` },
106	{ `0.99999999885102686`, `0.00004793689960307` },
107	{ `0.99999999971275666`, `0.00002396844980842` },
108	{ `0.99999999992818922`, `0.00001198422490507` },
109	{ `0.99999999998204725`, `0.00000599211245264` },
110	{ `0.99999999999551181`, `0.00000299605622633` },
111	{ `0.99999999999887801`, `0.00000149802811317` },
112	{ `0.99999999999971945`, `0.00000074901405658` },
113	{ `0.99999999999992983`, `0.00000037450702829` },
114	{ `0.99999999999998246`, `0.00000018725351415` },
115	{ `0.99999999999999567`, `0.00000009362675707` },
116	{ `0.99999999999999889`, `0.00000004681337854` },
117	{ `0.99999999999999978`, `0.00000002340668927` },
118	{ `0.99999999999999989`, `0.00000001170334463` },
119	{ `1.00000000000000000`, `0.00000000585167232` },
120	{ `1.00000000000000000`, `0.00000000292583616` }
121	};
122
123	namespace {
124	template <typename T>
125	struct Constants {
126	static const T sin_120;
127	static const T fft5_2;
128	static const T fft5_3;
129	static const T fft5_4;
130	static const T fft5_5;
131	};
132
133	template <typename T>
134	const T Constants<T>::sin_120 = (T)`0.86602540378443864676372317075294`;
135
136	template <typename T>
137	const T Constants<T>::fft5_2 = (T)`0.559016994374947424102293417182819`;
138
139	template <typename T>
140	const T Constants<T>::fft5_3 = (T)-`0.951056516295153572116439333379382`;
141
142	template <typename T>
143	const T Constants<T>::fft5_4 = (T)-`1.538841768587626701285145288018455`;
144
145	template <typename T>
146	const T Constants<T>::fft5_5 = (T)`0.363271264002680442947733378740309`;
147
148	} //namespace
149
150	#define BitRev(i,shift) \
151	((int)((((unsigned)bitrevTab[(i)&255] << 24)+ \
152	((unsigned)bitrevTab[((i)>> 8)&255] << 16)+ \
153	((unsigned)bitrevTab[((i)>>16)&255] << 8)+ \
154	((unsigned)bitrevTab[((i)>>24)])) >> (shift)))
155
156	static int
157	DFTFactorize( int n, int* factors )
158	{
159	int nf = `0`, f, i, j;
160
161	if( n <= `5` )
162	{
163	factors[`0`] = n;
164	return `1`;
165	}
166
167	f = (((n - `1`)^n)+`1`) >> `1`;
168	if( f > `1` )
169	{
170	factors[nf++] = f;
171	n = f == n ? `1` : n/f;
172	}
173
174	for( f = `3`; n > `1`; )
175	{
176	int d = n/f;
177	if( d*f == n )
178	{
179	factors[nf++] = f;
180	n = d;
181	}
182	else
183	{
184	f += `2`;
185	if( f*f > n )
186	break;
187	}
188	}
189
190	if( n > `1` )
191	factors[nf++] = n;
192
193	f = (factors[`0`] & `1`) == `0`;
194	for( i = f; i < (nf+f)/`2`; i++ )
195	CV_SWAP( factors[i], factors[nf-i-`1`+f], j );
196
197	return nf;
198	}
199
200	static void
201	DFTInit( int n0, int nf, const int* factors, int* itab, int elem_size, void* _wave, int inv_itab )
202	{
203	int digits[`34`], radix[`34`];
204	int n = factors[`0`], m = `0`;
205	int* itab0 = itab;
206	int i, j, k;
207	Complex<double> w, w1;
208	double t;
209
210	if( n0 <= `5` )
211	{
212	itab[`0`] = `0`;
213	itab[n0-`1`] = n0-`1`;
214
215	if( n0 != `4` )
216	{
217	for( i = `1`; i < n0-`1`; i++ )
218	itab[i] = i;
219	}
220	else
221	{
222	itab[`1`] = `2`;
223	itab[`2`] = `1`;
224	}
225	if( n0 == `5` )
226	{
227	if( elem_size == sizeof(Complex<double>) )
228	((Complex<double>)_wave)[`0`] = Complex<double*>(`1.`,`0.`);
229	else
230	((Complex<float>)_wave)[`0`] = Complex<float*>(`1.f`,`0.f`);
231	}
232	if( n0 != `4` )
233	return;
234	m = `2`;
235	}
236	else
237	{
238	// radix[] is initialized from index 'nf' down to zero
239	CV_Assert (nf < `34`);
240	radix[nf] = `1`;
241	digits[nf] = `0`;
242	for( i = `0`; i < nf; i++ )
243	{
244	digits[i] = `0`;
245	radix[nf-i-`1`] = radix[nf-i]*factors[nf-i-`1`];
246	}
247
248	if( inv_itab && factors[`0`] != factors[nf-`1`] )
249	itab = (int*)_wave;
250
251	if( (n & `1`) == `0` )
252	{
253	int a = radix[`1`], na2 = n*a>>`1`, na4 = na2 >> `1`;
254	for( m = `0`; (unsigned)(`1` << m) < (unsigned)n; m++ )
255	;
256	if( n <= `2` )
257	{
258	itab[`0`] = `0`;
259	itab[`1`] = na2;
260	}
261	else if( n <= `256` )
262	{
263	int shift = `10` - m;
264	for( i = `0`; i <= n - `4`; i += `4` )
265	{
266	j = (bitrevTab[i>>`2`]>>shift)*a;
267	itab[i] = j;
268	itab[i+`1`] = j + na2;
269	itab[i+`2`] = j + na4;
270	itab[i+`3`] = j + na2 + na4;
271	}
272	}
273	else
274	{
275	int shift = `34` - m;
276	for( i = `0`; i < n; i += `4` )
277	{
278	int i4 = i >> `2`;
279	j = BitRev(i4,shift)*a;
280	itab[i] = j;
281	itab[i+`1`] = j + na2;
282	itab[i+`2`] = j + na4;
283	itab[i+`3`] = j + na2 + na4;
284	}
285	}
286
287	digits[`1`]++;
288
289	if( nf >= `2` )
290	{
291	for( i = n, j = radix[`2`]; i < n0; )
292	{
293	for( k = `0`; k < n; k++ )
294	itab[i+k] = itab[k] + j;
295	if( (i += n) >= n0 )
296	break;
297	j += radix[`2`];
298	for( k = `1`; ++digits[k] >= factors[k]; k++ )
299	{
300	digits[k] = `0`;
301	j += radix[k+`2`] - radix[k];
302	}
303	}
304	}
305	}
306	else
307	{
308	for( i = `0`, j = `0`;; )
309	{
310	itab[i] = j;
311	if( ++i >= n0 )
312	break;
313	j += radix[`1`];
314	for( k = `0`; ++digits[k] >= factors[k]; k++ )
315	{
316	digits[k] = `0`;
317	j += radix[k+`2`] - radix[k];
318	}
319	}
320	}
321
322	if( itab != itab0 )
323	{
324	itab0[`0`] = `0`;
325	for( i = n0 & `1`; i < n0; i += `2` )
326	{
327	int k0 = itab[i];
328	int k1 = itab[i+`1`];
329	itab0[k0] = i;
330	itab0[k1] = i+`1`;
331	}
332	}
333	}
334
335	if( (n0 & (n0-`1`)) == `0` )
336	{
337	w.re = w1.re = DFTTab[m][`0`];
338	w.im = w1.im = -DFTTab[m][`1`];
339	}
340	else
341	{
342	t = -CV_PI*`2`/n0;
343	w.im = w1.im = sin(x: t);
344	w.re = w1.re = std::sqrt(x: `1.` - w1.im*w1.im);
345	}
346	n = (n0+`1`)/`2`;
347
348	if( elem_size == sizeof(Complex<double>) )
349	{
350	Complex<double>* wave = (Complex<double>*)_wave;
351
352	wave[`0`].re = `1.`;
353	wave[`0`].im = `0.`;
354
355	if( (n0 & `1`) == `0` )
356	{
357	wave[n].re = -`1.`;
358	wave[n].im = `0`;
359	}
360
361	for( i = `1`; i < n; i++ )
362	{
363	wave[i] = w;
364	wave[n0-i].re = w.re;
365	wave[n0-i].im = -w.im;
366
367	t = w.rew1.re - w.imw1.im;
368	w.im = w.rew1.im + w.imw1.re;
369	w.re = t;
370	}
371	}
372	else
373	{
374	Complex<float>* wave = (Complex<float>*)_wave;
375	CV_Assert( elem_size == sizeof(Complex<float>) );
376
377	wave[`0`].re = `1.f`;
378	wave[`0`].im = `0.f`;
379
380	if( (n0 & `1`) == `0` )
381	{
382	wave[n].re = -`1.f`;
383	wave[n].im = `0.f`;
384	}
385
386	for( i = `1`; i < n; i++ )
387	{
388	wave[i].re = (float)w.re;
389	wave[i].im = (float)w.im;
390	wave[n0-i].re = (float)w.re;
391	wave[n0-i].im = (float)-w.im;
392
393	t = w.rew1.re - w.imw1.im;
394	w.im = w.rew1.im + w.imw1.re;
395	w.re = t;
396	}
397	}
398	}
399
400	// Reference radix-2 implementation.
401	template<typename T> struct DFT_R2
402	{
403	void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
404	const int nx = n/`2`;
405	for(int i = `0` ; i < c_n; i += n)
406	{
407	Complex<T>* v = dst + i;
408	T r0 = v[`0`].re + v[nx].re;
409	T i0 = v[`0`].im + v[nx].im;
410	T r1 = v[`0`].re - v[nx].re;
411	T i1 = v[`0`].im - v[nx].im;
412	v[`0`].re = r0; v[`0`].im = i0;
413	v[nx].re = r1; v[nx].im = i1;
414
415	for( int j = `1`, dw = dw0; j < nx; j++, dw += dw0 )
416	{
417	v = dst + i + j;
418	r1 = v[nx].rewave[dw].re - v[nx].imwave[dw].im;
419	i1 = v[nx].imwave[dw].re + v[nx].rewave[dw].im;
420	r0 = v[`0`].re; i0 = v[`0`].im;
421
422	v[`0`].re = r0 + r1; v[`0`].im = i0 + i1;
423	v[nx].re = r0 - r1; v[nx].im = i0 - i1;
424	}
425	}
426	}
427	};
428
429	// Reference radix-3 implementation.
430	template<typename T> struct DFT_R3
431	{
432	void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
433	const int nx = n / `3`;
434	for(int i = `0`; i < c_n; i += n )
435	{
436	{
437	Complex<T>* v = dst + i;
438	T r1 = v[nx].re + v[nx*`2`].re;
439	T i1 = v[nx].im + v[nx*`2`].im;
440	T r0 = v[`0`].re;
441	T i0 = v[`0`].im;
442	T r2 = Constants<T>::sin_120(v[nx].im - v[nx`2`].im);
443	T i2 = Constants<T>::sin_120(v[nx`2`].re - v[nx].re);
444	v[`0`].re = r0 + r1; v[`0`].im = i0 + i1;
445	r0 -= (T)`0.5`r1; i0 -= (T)`0.5`i1;
446	v[nx].re = r0 + r2; v[nx].im = i0 + i2;
447	v[nx`2`].re = r0 - r2; v[nx`2`].im = i0 - i2;
448	}
449
450	for(int j = `1`, dw = dw0; j < nx; j++, dw += dw0 )
451	{
452	Complex<T>* v = dst + i + j;
453	T r0 = v[nx].rewave[dw].re - v[nx].imwave[dw].im;
454	T i0 = v[nx].rewave[dw].im + v[nx].imwave[dw].re;
455	T i2 = v[nx`2`].rewave[dw`2`].re - v[nx`2`].imwave[dw`2`].im;
456	T r2 = v[nx`2`].rewave[dw`2`].im + v[nx`2`].imwave[dw`2`].re;
457	T r1 = r0 + i2; T i1 = i0 + r2;
458
459	r2 = Constants<T>::sin_120(i0 - r2); i2 = Constants<T>::sin_120(i2 - r0);
460	r0 = v[`0`].re; i0 = v[`0`].im;
461	v[`0`].re = r0 + r1; v[`0`].im = i0 + i1;
462	r0 -= (T)`0.5`r1; i0 -= (T)`0.5`i1;
463	v[nx].re = r0 + r2; v[nx].im = i0 + i2;
464	v[nx`2`].re = r0 - r2; v[nx`2`].im = i0 - i2;
465	}
466	}
467	}
468	};
469
470	// Reference radix-5 implementation.
471	template<typename T> struct DFT_R5
472	{
473	void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
474	const int nx = n / `5`;
475	for(int i = `0`; i < c_n; i += n )
476	{
477	for(int j = `0`, dw = `0`; j < nx; j++, dw += dw0 )
478	{
479	Complex<T>* v0 = dst + i + j;
480	Complex<T>* v1 = v0 + nx*`2`;
481	Complex<T>* v2 = v1 + nx*`2`;
482
483	T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5;
484
485	r3 = v0[nx].rewave[dw].re - v0[nx].imwave[dw].im;
486	i3 = v0[nx].rewave[dw].im + v0[nx].imwave[dw].re;
487	r2 = v2[`0`].rewave[dw`4`].re - v2[`0`].imwave[dw`4`].im;
488	i2 = v2[`0`].rewave[dw`4`].im + v2[`0`].imwave[dw`4`].re;
489
490	r1 = r3 + r2; i1 = i3 + i2;
491	r3 -= r2; i3 -= i2;
492
493	r4 = v1[nx].rewave[dw`3`].re - v1[nx].imwave[dw`3`].im;
494	i4 = v1[nx].rewave[dw`3`].im + v1[nx].imwave[dw`3`].re;
495	r0 = v1[`0`].rewave[dw`2`].re - v1[`0`].imwave[dw`2`].im;
496	i0 = v1[`0`].rewave[dw`2`].im + v1[`0`].imwave[dw`2`].re;
497
498	r2 = r4 + r0; i2 = i4 + i0;
499	r4 -= r0; i4 -= i0;
500
501	r0 = v0[`0`].re; i0 = v0[`0`].im;
502	r5 = r1 + r2; i5 = i1 + i2;
503
504	v0[`0`].re = r0 + r5; v0[`0`].im = i0 + i5;
505
506	r0 -= (T)`0.25`r5; i0 -= (T)`0.25`i5;
507	r1 = Constants<T>::fft5_2(r1 - r2); i1 = Constants<T>::fft5_2(i1 - i2);
508	r2 = -Constants<T>::fft5_3(i3 + i4); i2 = Constants<T>::fft5_3(r3 + r4);
509
510	i3 = -Constants<T>::fft5_5; r3 = Constants<T>::fft5_5;
511	i4 = -Constants<T>::fft5_4; r4 = Constants<T>::fft5_4;
512
513	r5 = r2 + i3; i5 = i2 + r3;
514	r2 -= i4; i2 -= r4;
515
516	r3 = r0 + r1; i3 = i0 + i1;
517	r0 -= r1; i0 -= i1;
518
519	v0[nx].re = r3 + r2; v0[nx].im = i3 + i2;
520	v2[`0`].re = r3 - r2; v2[`0`].im = i3 - i2;
521
522	v1[`0`].re = r0 + r5; v1[`0`].im = i0 + i5;
523	v1[nx].re = r0 - r5; v1[nx].im = i0 - i5;
524	}
525	}
526	}
527	};
528
529	template<typename T> struct DFT_VecR2
530	{
531	void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
532	DFT_R2<T>()(dst, c_n, n, dw0, wave);
533	}
534	};
535
536	template<typename T> struct DFT_VecR3
537	{
538	void operator()(Complex<T>* dst, const int c_n, const int n, const int dw0, const Complex<T>* wave) const {
539	DFT_R3<T>()(dst, c_n, n, dw0, wave);
540	}
541	};
542
543	template<typename T> struct DFT_VecR4
544	{
545	int operator()(Complex<T>, int, int, int&, const* Complex<T>) const* { return `1`; }
546	};
547
548	#if CV_SSE3
549
550	// multiplies a and b:
551	// r_re + ir_im = (a_re + ia_im)(b_re + ib_im)
552	// r_re and r_im are placed respectively in bits 31:0 and 63:32 of the resulting
553	// vector register.
554	inline __m128 complexMul(const Complex<float>* const a, const Complex<float>* const b) {
555	const __m128 z = _mm_setzero_ps();
556	const __m128 neg_elem0 = _mm_set_ps(z: `0.0f`,y: `0.0f`,x: `0.0f`,w: -`0.0f`);
557	// v_a[31:0] is a->re and v_a[63:32] is a->im.
558	const __m128 v_a = _mm_loadl_pi(a: z, p: (const __m64*)a);
559	const __m128 v_b = _mm_loadl_pi(a: z, p: (const __m64*)b);
560	// x_1 = v[nx] wave[dw].*
561	const __m128 v_a_riri = _mm_shuffle_ps(v_a, v_a, _MM_SHUFFLE(`0`, `1`, `0`, `1`));
562	const __m128 v_b_irri = _mm_shuffle_ps(v_b, v_b, _MM_SHUFFLE(`1`, `0`, `0`, `1`));
563	const __m128 mul = _mm_mul_ps(a: v_a_riri, b: v_b_irri);
564	const __m128 xored = _mm_xor_ps(a: mul, b: neg_elem0);
565	return _mm_hadd_ps(a: xored, b: z);
566	}
567
568	// optimized radix-2 transform
569	template<> struct DFT_VecR2<float> {
570	void operator()(Complex<float>* dst, const int c_n, const int n, const int dw0, const Complex<float>* wave) const {
571	const __m128 z = _mm_setzero_ps();
572	const int nx = n/`2`;
573	for(int i = `0` ; i < c_n; i += n)
574	{
575	{
576	Complex<float>* v = dst + i;
577	float r0 = v[`0`].re + v[nx].re;
578	float i0 = v[`0`].im + v[nx].im;
579	float r1 = v[`0`].re - v[nx].re;
580	float i1 = v[`0`].im - v[nx].im;
581	v[`0`].re = r0; v[`0`].im = i0;
582	v[nx].re = r1; v[nx].im = i1;
583	}
584
585	for( int j = `1`, dw = dw0; j < nx; j++, dw += dw0 )
586	{
587	Complex<float>* v = dst + i + j;
588	const __m128 x_1 = complexMul(a: &v[nx], b: &wave[dw]);
589	const __m128 v_0 = _mm_loadl_pi(a: z, p: (const __m64*)&v[`0`]);
590	_mm_storel_pi(p: (__m64*)&v[`0`], a: _mm_add_ps(a: v_0, b: x_1));
591	_mm_storel_pi(p: (__m64*)&v[nx], a: _mm_sub_ps(a: v_0, b: x_1));
592	}
593	}
594	}
595	};
596
597	// Optimized radix-3 implementation.
598	template<> struct DFT_VecR3<float> {
599	void operator()(Complex<float>* dst, const int c_n, const int n, const int dw0, const Complex<float>* wave) const {
600	const int nx = n / `3`;
601	const __m128 z = _mm_setzero_ps();
602	const __m128 neg_elem1 = _mm_set_ps(z: `0.0f`,y: `0.0f`,x: -`0.0f`,w: `0.0f`);
603	const __m128 sin_120 = _mm_set1_ps(w: Constants<float>::sin_120);
604	const __m128 one_half = _mm_set1_ps(w: `0.5f`);
605	for(int i = `0`; i < c_n; i += n )
606	{
607	{
608	Complex<float>* v = dst + i;
609
610	float r1 = v[nx].re + v[nx*`2`].re;
611	float i1 = v[nx].im + v[nx*`2`].im;
612	float r0 = v[`0`].re;
613	float i0 = v[`0`].im;
614	float r2 = Constants<float>::sin_120(v[nx].im - v[nx`2`].im);
615	float i2 = Constants<float>::sin_120(v[nx`2`].re - v[nx].re);
616	v[`0`].re = r0 + r1; v[`0`].im = i0 + i1;
617	r0 -= (float)`0.5`r1; i0 -= (float)`0.5`i1;
618	v[nx].re = r0 + r2; v[nx].im = i0 + i2;
619	v[nx`2`].re = r0 - r2; v[nx`2`].im = i0 - i2;
620	}
621
622	for(int j = `1`, dw = dw0; j < nx; j++, dw += dw0 )
623	{
624	Complex<float>* v = dst + i + j;
625	const __m128 x_0 = complexMul(a: &v[nx], b: &wave[dw]);
626	const __m128 x_2 = complexMul(a: &v[nx`2`], b: &wave[dw`2`]);
627	const __m128 x_1 = _mm_add_ps(a: x_0, b: x_2);
628
629	const __m128 v_0 = _mm_loadl_pi(a: z, p: (const __m64*)&v[`0`]);
630	_mm_storel_pi(p: (__m64*)&v[`0`], a: _mm_add_ps(a: v_0, b: x_1));
631
632	const __m128 x_3 = _mm_mul_ps(a: sin_120, b: _mm_xor_ps(a: _mm_sub_ps(a: x_2, b: x_0), b: neg_elem1));
633	const __m128 x_3s = _mm_shuffle_ps(x_3, x_3, _MM_SHUFFLE(`0`, `1`, `0`, `1`));
634	const __m128 x_4 = _mm_sub_ps(a: v_0, b: _mm_mul_ps(a: one_half, b: x_1));
635	_mm_storel_pi(p: (__m64*)&v[nx], a: _mm_add_ps(a: x_4, b: x_3s));
636	_mm_storel_pi(p: (__m64)&v[nx`2`], a: _mm_sub_ps(a: x_4, b: x_3s));
637	}
638	}
639	}
640	};
641
642	// optimized radix-4 transform
643	template<> struct DFT_VecR4<float>
644	{
645	int operator()(Complex<float>* dst, int N, int n0, int& _dw0, const Complex<float>* wave) const
646	{
647	int n = `1`, i, j, nx, dw, dw0 = _dw0;
648	__m128 z = _mm_setzero_ps(), x02=z, x13=z, w01=z, w23=z, y01, y23, t0, t1;
649	Cv32suf t; t.i = `0x80000000`;
650	__m128 neg0_mask = _mm_load_ss(p: &t.f);
651	__m128 neg3_mask = _mm_shuffle_ps(neg0_mask, neg0_mask, _MM_SHUFFLE(`0`,`1`,`2`,`3`));
652
653	for( ; n*`4` <= N; )
654	{
655	nx = n;
656	n *= `4`;
657	dw0 /= `4`;
658
659	for( i = `0`; i < n0; i += n )
660	{
661	Complexf v0, v1;
662
663	v0 = dst + i;
664	v1 = v0 + nx*`2`;
665
666	x02 = _mm_loadl_pi(a: x02, p: (const __m64*)&v0[`0`]);
667	x13 = _mm_loadl_pi(a: x13, p: (const __m64*)&v0[nx]);
668	x02 = _mm_loadh_pi(a: x02, p: (const __m64*)&v1[`0`]);
669	x13 = _mm_loadh_pi(a: x13, p: (const __m64*)&v1[nx]);
670
671	y01 = _mm_add_ps(a: x02, b: x13);
672	y23 = _mm_sub_ps(a: x02, b: x13);
673	t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(`2`,`3`,`3`,`2`)), b: neg3_mask);
674	t0 = _mm_movelh_ps(a: y01, b: y23);
675	y01 = _mm_add_ps(a: t0, b: t1);
676	y23 = _mm_sub_ps(a: t0, b: t1);
677
678	_mm_storel_pi(p: (__m64*)&v0[`0`], a: y01);
679	_mm_storeh_pi(p: (__m64*)&v0[nx], a: y01);
680	_mm_storel_pi(p: (__m64*)&v1[`0`], a: y23);
681	_mm_storeh_pi(p: (__m64*)&v1[nx], a: y23);
682
683	for( j = `1`, dw = dw0; j < nx; j++, dw += dw0 )
684	{
685	v0 = dst + i + j;
686	v1 = v0 + nx*`2`;
687
688	x13 = _mm_loadl_pi(a: x13, p: (const __m64*)&v0[nx]);
689	w23 = _mm_loadl_pi(a: w23, p: (const __m64)&wave[dw`2`]);
690	x13 = _mm_loadh_pi(a: x13, p: (const __m64)&v1[nx]); // x1, x3 = r1 i1 r3 i3*
691	w23 = _mm_loadh_pi(a: w23, p: (const __m64)&wave[dw`3`]); // w2, w3 = wr2 wi2 wr3 wi3
692
693	t0 = _mm_mul_ps(a: _mm_moveldup_ps(a: x13), b: w23);
694	t1 = _mm_mul_ps(a: _mm_movehdup_ps(a: x13), _mm_shuffle_ps(w23, w23, _MM_SHUFFLE(`2`,`3`,`0`,`1`)));
695	x13 = _mm_addsub_ps(a: t0, b: t1);
696	// re(x1w2), im(x1w2), re(x3w3), im(x3w3)
697	x02 = _mm_loadl_pi(a: x02, p: (const __m64)&v1[`0`]); // x2 = r2 i2*
698	w01 = _mm_loadl_pi(a: w01, p: (const __m64)&wave[dw]); // w1 = wr1 wi1*
699	x02 = _mm_shuffle_ps(x02, x02, _MM_SHUFFLE(`0`,`0`,`1`,`1`));
700	w01 = _mm_shuffle_ps(w01, w01, _MM_SHUFFLE(`1`,`0`,`0`,`1`));
701	x02 = _mm_mul_ps(a: x02, b: w01);
702	x02 = _mm_addsub_ps(a: x02, b: _mm_movelh_ps(a: x02, b: x02));
703	// re(x0) im(x0) re(x2w1), im(x2w1)
704	x02 = _mm_loadl_pi(a: x02, p: (const __m64*)&v0[`0`]);
705
706	y01 = _mm_add_ps(a: x02, b: x13);
707	y23 = _mm_sub_ps(a: x02, b: x13);
708	t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(`2`,`3`,`3`,`2`)), b: neg3_mask);
709	t0 = _mm_movelh_ps(a: y01, b: y23);
710	y01 = _mm_add_ps(a: t0, b: t1);
711	y23 = _mm_sub_ps(a: t0, b: t1);
712
713	_mm_storel_pi(p: (__m64*)&v0[`0`], a: y01);
714	_mm_storeh_pi(p: (__m64*)&v0[nx], a: y01);
715	_mm_storel_pi(p: (__m64*)&v1[`0`], a: y23);
716	_mm_storeh_pi(p: (__m64*)&v1[nx], a: y23);
717	}
718	}
719	}
720
721	_dw0 = dw0;
722	return n;
723	}
724	};
725
726	#endif
727
728	#ifdef USE_IPP_DFT
729	static IppStatus ippsDFTFwd_CToC( const Complex<float>* src, Complex<float>* dst,
730	const void* spec, uchar* buf)
731	{
732	return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_CToC_32fc, (const Ipp32fc)src, (Ipp32fc)dst,
733	(const IppsDFTSpec_C_32fc*)spec, buf);
734	}
735
736	static IppStatus ippsDFTFwd_CToC( const Complex<double>* src, Complex<double>* dst,
737	const void* spec, uchar* buf)
738	{
739	return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_CToC_64fc, (const Ipp64fc)src, (Ipp64fc)dst,
740	(const IppsDFTSpec_C_64fc*)spec, buf);
741	}
742
743	static IppStatus ippsDFTInv_CToC( const Complex<float>* src, Complex<float>* dst,
744	const void* spec, uchar* buf)
745	{
746	return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_CToC_32fc, (const Ipp32fc)src, (Ipp32fc)dst,
747	(const IppsDFTSpec_C_32fc*)spec, buf);
748	}
749
750	static IppStatus ippsDFTInv_CToC( const Complex<double>* src, Complex<double>* dst,
751	const void* spec, uchar* buf)
752	{
753	return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_CToC_64fc, (const Ipp64fc)src, (Ipp64fc)dst,
754	(const IppsDFTSpec_C_64fc*)spec, buf);
755	}
756
757	static IppStatus ippsDFTFwd_RToPack( const float* src, float* dst,
758	const void* spec, uchar* buf)
759	{
760	return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_RToPack_32f, src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
761	}
762
763	static IppStatus ippsDFTFwd_RToPack( const double* src, double* dst,
764	const void* spec, uchar* buf)
765	{
766	return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_RToPack_64f, src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
767	}
768
769	static IppStatus ippsDFTInv_PackToR( const float* src, float* dst,
770	const void* spec, uchar* buf)
771	{
772	return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_PackToR_32f, src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
773	}
774
775	static IppStatus ippsDFTInv_PackToR( const double* src, double* dst,
776	const void* spec, uchar* buf)
777	{
778	return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_PackToR_64f, src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
779	}
780	#endif
781
782	struct OcvDftOptions;
783
784	typedef void (DFTFunc)(const* OcvDftOptions & c, const void* src, void* dst);
785
786	struct OcvDftOptions {
787	int nf;
788	int *factors;
789	double scale;
790
791	int* itab;
792	void* wave;
793	int tab_size;
794	int n;
795
796	bool isInverse;
797	bool noPermute;
798	bool isComplex;
799
800	bool haveSSE3;
801
802	DFTFunc dft_func;
803	bool useIpp;
804
805	#ifdef USE_IPP_DFT
806	uchar* ipp_spec;
807	uchar* ipp_work;
808	#endif
809
810	OcvDftOptions()
811	{
812	nf = `0`;
813	factors = `0`;
814	scale = `0`;
815	itab = `0`;
816	wave = `0`;
817	tab_size = `0`;
818	n = `0`;
819	isInverse = false;
820	noPermute = false;
821	isComplex = false;
822	useIpp = false;
823	#ifdef USE_IPP_DFT
824	ipp_spec = `0`;
825	ipp_work = `0`;
826	#endif
827	dft_func = `0`;
828	haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
829	}
830	};
831
832	// mixed-radix complex discrete Fourier transform: double-precision version
833	template<typename T> static void
834	DFT(const OcvDftOptions & c, const Complex<T>* src, Complex<T>* dst)
835	{
836	const Complex<T>* wave = (Complex<T>*)c.wave;
837	const int * itab = c.itab;
838
839	int n = c.n;
840	int f_idx, nx;
841	int inv = c.isInverse;
842	int dw0 = c.tab_size, dw;
843	int i, j, k;
844	Complex<T> t;
845	T scale = (T)c.scale;
846
847	if(typeid(T) == typeid(float))
848	{
849	CALL_HAL(dft, cv_hal_dft, reinterpret_cast<const uchar>(src), reinterpret_cast<uchar>(dst), CV_32F,
850	c.nf, c.factors, c.scale, c.itab, c.wave, c.tab_size, c.n, c.isInverse, c.noPermute);
851	}
852	if(typeid(T) == typeid(double))
853	{
854	CALL_HAL(dft, cv_hal_dft, reinterpret_cast<const uchar>(src), reinterpret_cast<uchar>(dst), CV_64F,
855	c.nf, c.factors, c.scale, c.itab, c.wave, c.tab_size, c.n, c.isInverse, c.noPermute);
856	}
857
858	if( c.useIpp )
859	{
860	#ifdef USE_IPP_DFT
861	if( !inv )
862	{
863	if (ippsDFTFwd_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= `0`)
864	{
865	CV_IMPL_ADD(CV_IMPL_IPP);
866	return;
867	}
868	}
869	else
870	{
871	if (ippsDFTInv_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= `0`)
872	{
873	CV_IMPL_ADD(CV_IMPL_IPP);
874	return;
875	}
876	}
877	setIppErrorStatus();
878	#endif
879	}
880
881	int tab_step = c.tab_size == n ? `1` : c.tab_size == n*`2` ? `2` : c.tab_size/n;
882
883	// 0. shuffle data
884	if( dst != src )
885	{
886	CV_Assert( !c.noPermute );
887	if( !inv )
888	{
889	for( i = `0`; i <= n - `2`; i += `2`, itab += `2`*tab_step )
890	{
891	int k0 = itab[`0`], k1 = itab[tab_step];
892	CV_Assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
893	dst[i] = src[k0]; dst[i+`1`] = src[k1];
894	}
895
896	if( i < n )
897	dst[n-`1`] = src[n-`1`];
898	}
899	else
900	{
901	for( i = `0`; i <= n - `2`; i += `2`, itab += `2`*tab_step )
902	{
903	int k0 = itab[`0`], k1 = itab[tab_step];
904	CV_Assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
905	t.re = src[k0].re; t.im = -src[k0].im;
906	dst[i] = t;
907	t.re = src[k1].re; t.im = -src[k1].im;
908	dst[i+`1`] = t;
909	}
910
911	if( i < n )
912	{
913	t.re = src[n-`1`].re; t.im = -src[n-`1`].im;
914	dst[i] = t;
915	}
916	}
917	}
918	else
919	{
920	if( !c.noPermute )
921	{
922	CV_Assert( c.factors[`0`] == c.factors[c.nf-`1`] );
923	if( c.nf == `1` )
924	{
925	if( (n & `3`) == `0` )
926	{
927	int n2 = n/`2`;
928	Complex<T>* dsth = dst + n2;
929
930	for( i = `0`; i < n2; i += `2`, itab += tab_step*`2` )
931	{
932	j = itab[`0`];
933	CV_Assert( (unsigned)j < (unsigned)n2 );
934
935	CV_SWAP(dst[i+`1`], dsth[j], t);
936	if( j > i )
937	{
938	CV_SWAP(dst[i], dst[j], t);
939	CV_SWAP(dsth[i+`1`], dsth[j+`1`], t);
940	}
941	}
942	}
943	// else do nothing
944	}
945	else
946	{
947	for( i = `0`; i < n; i++, itab += tab_step )
948	{
949	j = itab[`0`];
950	CV_Assert( (unsigned)j < (unsigned)n );
951	if( j > i )
952	CV_SWAP(dst[i], dst[j], t);
953	}
954	}
955	}
956
957	if( inv )
958	{
959	for( i = `0`; i <= n - `2`; i += `2` )
960	{
961	T t0 = -dst[i].im;
962	T t1 = -dst[i+`1`].im;
963	dst[i].im = t0; dst[i+`1`].im = t1;
964	}
965
966	if( i < n )
967	dst[n-`1`].im = -dst[n-`1`].im;
968	}
969	}
970
971	n = `1`;
972	// 1. power-2 transforms
973	if( (c.factors[`0`] & `1`) == `0` )
974	{
975	if( c.factors[`0`] >= `4` && c.haveSSE3)
976	{
977	DFT_VecR4<T> vr4;
978	n = vr4(dst, c.factors[`0`], c.n, dw0, wave);
979	}
980
981	// radix-4 transform
982	for( ; n*`4` <= c.factors[`0`]; )
983	{
984	nx = n;
985	n *= `4`;
986	dw0 /= `4`;
987
988	for( i = `0`; i < c.n; i += n )
989	{
990	Complex<T> v0, v1;
991	T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4;
992
993	v0 = dst + i;
994	v1 = v0 + nx*`2`;
995
996	r0 = v1[`0`].re; i0 = v1[`0`].im;
997	r4 = v1[nx].re; i4 = v1[nx].im;
998
999	r1 = r0 + r4; i1 = i0 + i4;
1000	r3 = i0 - i4; i3 = r4 - r0;
1001
1002	r2 = v0[`0`].re; i2 = v0[`0`].im;
1003	r4 = v0[nx].re; i4 = v0[nx].im;
1004
1005	r0 = r2 + r4; i0 = i2 + i4;
1006	r2 -= r4; i2 -= i4;
1007
1008	v0[`0`].re = r0 + r1; v0[`0`].im = i0 + i1;
1009	v1[`0`].re = r0 - r1; v1[`0`].im = i0 - i1;
1010	v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
1011	v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
1012
1013	for( j = `1`, dw = dw0; j < nx; j++, dw += dw0 )
1014	{
1015	v0 = dst + i + j;
1016	v1 = v0 + nx*`2`;
1017
1018	r2 = v0[nx].rewave[dw`2`].re - v0[nx].imwave[dw`2`].im;
1019	i2 = v0[nx].rewave[dw`2`].im + v0[nx].imwave[dw`2`].re;
1020	r0 = v1[`0`].rewave[dw].im + v1[`0`].imwave[dw].re;
1021	i0 = v1[`0`].rewave[dw].re - v1[`0`].imwave[dw].im;
1022	r3 = v1[nx].rewave[dw`3`].im + v1[nx].imwave[dw`3`].re;
1023	i3 = v1[nx].rewave[dw`3`].re - v1[nx].imwave[dw`3`].im;
1024
1025	r1 = i0 + i3; i1 = r0 + r3;
1026	r3 = r0 - r3; i3 = i3 - i0;
1027	r4 = v0[`0`].re; i4 = v0[`0`].im;
1028
1029	r0 = r4 + r2; i0 = i4 + i2;
1030	r2 = r4 - r2; i2 = i4 - i2;
1031
1032	v0[`0`].re = r0 + r1; v0[`0`].im = i0 + i1;
1033	v1[`0`].re = r0 - r1; v1[`0`].im = i0 - i1;
1034	v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
1035	v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
1036	}
1037	}
1038	}
1039
1040	for( ; n < c.factors[`0`]; )
1041	{
1042	// do the remaining radix-2 transform
1043	n *= `2`;
1044	dw0 /= `2`;
1045
1046	if(c.haveSSE3)
1047	{
1048	DFT_VecR2<T> vr2;
1049	vr2(dst, c.n, n, dw0, wave);
1050	}
1051	else
1052	{
1053	DFT_R2<T> vr2;
1054	vr2(dst, c.n, n, dw0, wave);
1055	}
1056	}
1057	}
1058
1059	// 2. all the other transforms
1060	for( f_idx = (c.factors[`0`]&`1`) ? `0` : `1`; f_idx < c.nf; f_idx++ )
1061	{
1062	int factor = c.factors[f_idx];
1063	nx = n;
1064	n *= factor;
1065	dw0 /= factor;
1066
1067	if( factor == `3` )
1068	{
1069	if(c.haveSSE3)
1070	{
1071	DFT_VecR3<T> vr3;
1072	vr3(dst, c.n, n, dw0, wave);
1073	}
1074	else
1075	{
1076	DFT_R3<T> vr3;
1077	vr3(dst, c.n, n, dw0, wave);
1078	}
1079	}
1080	else if( factor == `5` )
1081	{
1082	DFT_R5<T> vr5;
1083	vr5(dst, c.n, n, dw0, wave);
1084	}
1085	else
1086	{
1087	// radix-"factor" - an odd number
1088	int p, q, factor2 = (factor - `1`)/`2`;
1089	int d, dd, dw_f = c.tab_size/factor;
1090	AutoBuffer<Complex<T> > buf(factor2 * `2`);
1091	Complex<T>* a = buf.data();
1092	Complex<T>* b = a + factor2;
1093
1094	for( i = `0`; i < c.n; i += n )
1095	{
1096	for( j = `0`, dw = `0`; j < nx; j++, dw += dw0 )
1097	{
1098	Complex<T>* v = dst + i + j;
1099	Complex<T> v_0 = v[`0`];
1100	Complex<T> vn_0 = v_0;
1101
1102	if( j == `0` )
1103	{
1104	for( p = `1`, k = nx; p <= factor2; p++, k += nx )
1105	{
1106	T r0 = v[k].re + v[n-k].re;
1107	T i0 = v[k].im - v[n-k].im;
1108	T r1 = v[k].re - v[n-k].re;
1109	T i1 = v[k].im + v[n-k].im;
1110
1111	vn_0.re += r0; vn_0.im += i1;
1112	a[p-`1`].re = r0; a[p-`1`].im = i0;
1113	b[p-`1`].re = r1; b[p-`1`].im = i1;
1114	}
1115	}
1116	else
1117	{
1118	const Complex<T>* wave_ = wave + dw*factor;
1119	d = dw;
1120
1121	for( p = `1`, k = nx; p <= factor2; p++, k += nx, d += dw )
1122	{
1123	T r2 = v[k].rewave[d].re - v[k].imwave[d].im;
1124	T i2 = v[k].rewave[d].im + v[k].imwave[d].re;
1125
1126	T r1 = v[n-k].rewave_[-d].re - v[n-k].imwave_[-d].im;
1127	T i1 = v[n-k].rewave_[-d].im + v[n-k].imwave_[-d].re;
1128
1129	T r0 = r2 + r1;
1130	T i0 = i2 - i1;
1131	r1 = r2 - r1;
1132	i1 = i2 + i1;
1133
1134	vn_0.re += r0; vn_0.im += i1;
1135	a[p-`1`].re = r0; a[p-`1`].im = i0;
1136	b[p-`1`].re = r1; b[p-`1`].im = i1;
1137	}
1138	}
1139
1140	v[`0`] = vn_0;
1141
1142	for( p = `1`, k = nx; p <= factor2; p++, k += nx )
1143	{
1144	Complex<T> s0 = v_0, s1 = v_0;
1145	d = dd = dw_f*p;
1146
1147	for( q = `0`; q < factor2; q++ )
1148	{
1149	T r0 = wave[d].re * a[q].re;
1150	T i0 = wave[d].im * a[q].im;
1151	T r1 = wave[d].re * b[q].im;
1152	T i1 = wave[d].im * b[q].re;
1153
1154	s1.re += r0 + i0; s0.re += r0 - i0;
1155	s1.im += r1 - i1; s0.im += r1 + i1;
1156
1157	d += dd;
1158	d -= -(d >= c.tab_size) & c.tab_size;
1159	}
1160
1161	v[k] = s0;
1162	v[n-k] = s1;
1163	}
1164	}
1165	}
1166	}
1167	}
1168
1169	if( scale != `1` )
1170	{
1171	T re_scale = scale, im_scale = scale;
1172	if( inv )
1173	im_scale = -im_scale;
1174
1175	for( i = `0`; i < c.n; i++ )
1176	{
1177	T t0 = dst[i].re*re_scale;
1178	T t1 = dst[i].im*im_scale;
1179	dst[i].re = t0;
1180	dst[i].im = t1;
1181	}
1182	}
1183	else if( inv )
1184	{
1185	for( i = `0`; i <= c.n - `2`; i += `2` )
1186	{
1187	T t0 = -dst[i].im;
1188	T t1 = -dst[i+`1`].im;
1189	dst[i].im = t0;
1190	dst[i+`1`].im = t1;
1191	}
1192
1193	if( i < c.n )
1194	dst[c.n-`1`].im = -dst[c.n-`1`].im;
1195	}
1196	}
1197
1198
1199	/ FFT of real vector*
1200	output vector format:
1201	re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ...
1202	re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] /*
1203	template<typename T> static void
1204	RealDFT(const OcvDftOptions & c, const T* src, T* dst)
1205	{
1206	int n = c.n;
1207	int complex_output = c.isComplex;
1208	T scale = (T)c.scale;
1209	int j;
1210	dst += complex_output;
1211
1212	if( c.useIpp )
1213	{
1214	#ifdef USE_IPP_DFT
1215	if (ippsDFTFwd_RToPack( src, dst, c.ipp_spec, c.ipp_work ) >=`0`)
1216	{
1217	if( complex_output )
1218	{
1219	dst[-`1`] = dst[`0`];
1220	dst[`0`] = `0`;
1221	if( (n & `1`) == `0` )
1222	dst[n] = `0`;
1223	}
1224	CV_IMPL_ADD(CV_IMPL_IPP);
1225	return;
1226	}
1227	setIppErrorStatus();
1228	#endif
1229	}
1230	CV_Assert( c.tab_size == n );
1231
1232	if( n == `1` )
1233	{
1234	dst[`0`] = src[`0`]*scale;
1235	}
1236	else if( n == `2` )
1237	{
1238	T t = (src[`0`] + src[`1`])*scale;
1239	dst[`1`] = (src[`0`] - src[`1`])*scale;
1240	dst[`0`] = t;
1241	}
1242	else if( n & `1` )
1243	{
1244	dst -= complex_output;
1245	Complex<T>* _dst = (Complex<T>*)dst;
1246	_dst[`0`].re = src[`0`]*scale;
1247	_dst[`0`].im = `0`;
1248	for( j = `1`; j < n; j += `2` )
1249	{
1250	T t0 = src[c.itab[j]]*scale;
1251	T t1 = src[c.itab[j+`1`]]*scale;
1252	_dst[j].re = t0;
1253	_dst[j].im = `0`;
1254	_dst[j+`1`].re = t1;
1255	_dst[j+`1`].im = `0`;
1256	}
1257	OcvDftOptions sub_c = c;
1258	sub_c.isComplex = false;
1259	sub_c.isInverse = false;
1260	sub_c.noPermute = true;
1261	sub_c.scale = `1.`;
1262	DFT(sub_c, _dst, _dst);
1263	if( !complex_output )
1264	dst[`1`] = dst[`0`];
1265	}
1266	else
1267	{
1268	T t0, t;
1269	T h1_re, h1_im, h2_re, h2_im;
1270	T scale2 = scale*(T)`0.5`;
1271	int n2 = n >> `1`;
1272
1273	c.factors[`0`] >>= `1`;
1274
1275	OcvDftOptions sub_c = c;
1276	sub_c.factors += (c.factors[`0`] == `1`);
1277	sub_c.nf -= (c.factors[`0`] == `1`);
1278	sub_c.isComplex = false;
1279	sub_c.isInverse = false;
1280	sub_c.noPermute = false;
1281	sub_c.scale = `1.`;
1282	sub_c.n = n2;
1283
1284	DFT(sub_c, (Complex<T>)src, (Complex<T>)dst);
1285
1286	c.factors[`0`] <<= `1`;
1287
1288	t = dst[`0`] - dst[`1`];
1289	dst[`0`] = (dst[`0`] + dst[`1`])*scale;
1290	dst[`1`] = t*scale;
1291
1292	t0 = dst[n2];
1293	t = dst[n-`1`];
1294	dst[n-`1`] = dst[`1`];
1295
1296	const Complex<T> wave = (const* Complex<T>*)c.wave;
1297
1298	for( j = `2`, wave++; j < n2; j += `2`, wave++ )
1299	{
1300	/ calc odd /
1301	h2_re = scale2*(dst[j+`1`] + t);
1302	h2_im = scale2*(dst[n-j] - dst[j]);
1303
1304	/ calc even /
1305	h1_re = scale2*(dst[j] + dst[n-j]);
1306	h1_im = scale2*(dst[j+`1`] - t);
1307
1308	/ rotate /
1309	t = h2_rewave->re - h2_imwave->im;
1310	h2_im = h2_rewave->im + h2_imwave->re;
1311	h2_re = t;
1312	t = dst[n-j-`1`];
1313
1314	dst[j-`1`] = h1_re + h2_re;
1315	dst[n-j-`1`] = h1_re - h2_re;
1316	dst[j] = h1_im + h2_im;
1317	dst[n-j] = h2_im - h1_im;
1318	}
1319
1320	if( j <= n2 )
1321	{
1322	dst[n2-`1`] = t0*scale;
1323	dst[n2] = -t*scale;
1324	}
1325	}
1326
1327	if( complex_output && ((n & `1`) == `0` \|\| n == `1`))
1328	{
1329	dst[-`1`] = dst[`0`];
1330	dst[`0`] = `0`;
1331	if( n > `1` )
1332	dst[n] = `0`;
1333	}
1334	}
1335
1336	/ Inverse FFT of complex conjugate-symmetric vector*
1337	input vector format:
1338	re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR
1339	re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] /*
1340	template<typename T> static void
1341	CCSIDFT(const OcvDftOptions & c, const T* src, T* dst)
1342	{
1343	int n = c.n;
1344	int complex_input = c.isComplex;
1345	int j, k;
1346	T scale = (T)c.scale;
1347	T save_s1 = `0.`;
1348	T t0, t1, t2, t3, t;
1349
1350	CV_Assert( c.tab_size == n );
1351
1352	if( complex_input )
1353	{
1354	CV_Assert( src != dst );
1355	save_s1 = src[`1`];
1356	((T*)src)[`1`] = src[`0`];
1357	src++;
1358	}
1359	if( c.useIpp )
1360	{
1361	#ifdef USE_IPP_DFT
1362	if (ippsDFTInv_PackToR( src, dst, c.ipp_spec, c.ipp_work ) >=`0`)
1363	{
1364	if( complex_input )
1365	((T*)src)[`0`] = (T)save_s1;
1366	CV_IMPL_ADD(CV_IMPL_IPP);
1367	return;
1368	}
1369
1370	setIppErrorStatus();
1371	#endif
1372	}
1373	if( n == `1` )
1374	{
1375	dst[`0`] = (T)(src[`0`]*scale);
1376	}
1377	else if( n == `2` )
1378	{
1379	t = (src[`0`] + src[`1`])*scale;
1380	dst[`1`] = (src[`0`] - src[`1`])*scale;
1381	dst[`0`] = t;
1382	}
1383	else if( n & `1` )
1384	{
1385	Complex<T>* _src = (Complex<T>*)(src-`1`);
1386	Complex<T>* _dst = (Complex<T>*)dst;
1387
1388	_dst[`0`].re = src[`0`];
1389	_dst[`0`].im = `0`;
1390
1391	int n2 = (n+`1`) >> `1`;
1392
1393	for( j = `1`; j < n2; j++ )
1394	{
1395	int k0 = c.itab[j], k1 = c.itab[n-j];
1396	t0 = _src[j].re; t1 = _src[j].im;
1397	_dst[k0].re = t0; _dst[k0].im = -t1;
1398	_dst[k1].re = t0; _dst[k1].im = t1;
1399	}
1400
1401	OcvDftOptions sub_c = c;
1402	sub_c.isComplex = false;
1403	sub_c.isInverse = false;
1404	sub_c.noPermute = true;
1405	sub_c.scale = `1.`;
1406	sub_c.n = n;
1407
1408	DFT(sub_c, _dst, _dst);
1409	dst[`0`] *= scale;
1410	for( j = `1`; j < n; j += `2` )
1411	{
1412	t0 = dst[j`2`]scale;
1413	t1 = dst[j`2`+`2`]scale;
1414	dst[j] = t0;
1415	dst[j+`1`] = t1;
1416	}
1417	}
1418	else
1419	{
1420	int inplace = src == dst;
1421	const Complex<T>* w = (const Complex<T>*)c.wave;
1422
1423	t = src[`1`];
1424	t0 = (src[`0`] + src[n-`1`]);
1425	t1 = (src[n-`1`] - src[`0`]);
1426	dst[`0`] = t0;
1427	dst[`1`] = t1;
1428
1429	int n2 = (n+`1`) >> `1`;
1430
1431	for( j = `2`, w++; j < n2; j += `2`, w++ )
1432	{
1433	T h1_re, h1_im, h2_re, h2_im;
1434
1435	h1_re = (t + src[n-j-`1`]);
1436	h1_im = (src[j] - src[n-j]);
1437
1438	h2_re = (t - src[n-j-`1`]);
1439	h2_im = (src[j] + src[n-j]);
1440
1441	t = h2_rew->re + h2_imw->im;
1442	h2_im = h2_imw->re - h2_rew->im;
1443	h2_re = t;
1444
1445	t = src[j+`1`];
1446	t0 = h1_re - h2_im;
1447	t1 = -h1_im - h2_re;
1448	t2 = h1_re + h2_im;
1449	t3 = h1_im - h2_re;
1450
1451	if( inplace )
1452	{
1453	dst[j] = t0;
1454	dst[j+`1`] = t1;
1455	dst[n-j] = t2;
1456	dst[n-j+`1`]= t3;
1457	}
1458	else
1459	{
1460	int j2 = j >> `1`;
1461	k = c.itab[j2];
1462	dst[k] = t0;
1463	dst[k+`1`] = t1;
1464	k = c.itab[n2-j2];
1465	dst[k] = t2;
1466	dst[k+`1`]= t3;
1467	}
1468	}
1469
1470	if( j <= n2 )
1471	{
1472	t0 = t*`2`;
1473	t1 = src[n2]*`2`;
1474
1475	if( inplace )
1476	{
1477	dst[n2] = t0;
1478	dst[n2+`1`] = t1;
1479	}
1480	else
1481	{
1482	k = c.itab[n2];
1483	dst[k*`2`] = t0;
1484	dst[k*`2`+`1`] = t1;
1485	}
1486	}
1487
1488	c.factors[`0`] >>= `1`;
1489
1490	OcvDftOptions sub_c = c;
1491	sub_c.factors += (c.factors[`0`] == `1`);
1492	sub_c.nf -= (c.factors[`0`] == `1`);
1493	sub_c.isComplex = false;
1494	sub_c.isInverse = false;
1495	sub_c.noPermute = !inplace;
1496	sub_c.scale = `1.`;
1497	sub_c.n = n2;
1498
1499	DFT(sub_c, (Complex<T>)dst, (Complex<T>)dst);
1500
1501	c.factors[`0`] <<= `1`;
1502
1503	for( j = `0`; j < n; j += `2` )
1504	{
1505	t0 = dst[j]*scale;
1506	t1 = dst[j+`1`]*(-scale);
1507	dst[j] = t0;
1508	dst[j+`1`] = t1;
1509	}
1510	}
1511	if( complex_input )
1512	((T*)src)[`0`] = (T)save_s1;
1513	}
1514
1515	static void
1516	CopyColumn( const uchar* _src, size_t src_step,
1517	uchar* _dst, size_t dst_step,
1518	int len, size_t elem_size )
1519	{
1520	int i, t0, t1;
1521	const int* src = (const int*)_src;
1522	int* dst = (int*)_dst;
1523	src_step /= sizeof(src[`0`]);
1524	dst_step /= sizeof(dst[`0`]);
1525
1526	if( elem_size == sizeof(int) )
1527	{
1528	for( i = `0`; i < len; i++, src += src_step, dst += dst_step )
1529	dst[`0`] = src[`0`];
1530	}
1531	else if( elem_size == sizeof(int)*`2` )
1532	{
1533	for( i = `0`; i < len; i++, src += src_step, dst += dst_step )
1534	{
1535	t0 = src[`0`]; t1 = src[`1`];
1536	dst[`0`] = t0; dst[`1`] = t1;
1537	}
1538	}
1539	else if( elem_size == sizeof(int)*`4` )
1540	{
1541	for( i = `0`; i < len; i++, src += src_step, dst += dst_step )
1542	{
1543	t0 = src[`0`]; t1 = src[`1`];
1544	dst[`0`] = t0; dst[`1`] = t1;
1545	t0 = src[`2`]; t1 = src[`3`];
1546	dst[`2`] = t0; dst[`3`] = t1;
1547	}
1548	}
1549	}
1550
1551
1552	static void
1553	CopyFrom2Columns( const uchar* _src, size_t src_step,
1554	uchar* _dst0, uchar* _dst1,
1555	int len, size_t elem_size )
1556	{
1557	int i, t0, t1;
1558	const int* src = (const int*)_src;
1559	int* dst0 = (int*)_dst0;
1560	int* dst1 = (int*)_dst1;
1561	src_step /= sizeof(src[`0`]);
1562
1563	if( elem_size == sizeof(int) )
1564	{
1565	for( i = `0`; i < len; i++, src += src_step )
1566	{
1567	t0 = src[`0`]; t1 = src[`1`];
1568	dst0[i] = t0; dst1[i] = t1;
1569	}
1570	}
1571	else if( elem_size == sizeof(int)*`2` )
1572	{
1573	for( i = `0`; i < len*`2`; i += `2`, src += src_step )
1574	{
1575	t0 = src[`0`]; t1 = src[`1`];
1576	dst0[i] = t0; dst0[i+`1`] = t1;
1577	t0 = src[`2`]; t1 = src[`3`];
1578	dst1[i] = t0; dst1[i+`1`] = t1;
1579	}
1580	}
1581	else if( elem_size == sizeof(int)*`4` )
1582	{
1583	for( i = `0`; i < len*`4`; i += `4`, src += src_step )
1584	{
1585	t0 = src[`0`]; t1 = src[`1`];
1586	dst0[i] = t0; dst0[i+`1`] = t1;
1587	t0 = src[`2`]; t1 = src[`3`];
1588	dst0[i+`2`] = t0; dst0[i+`3`] = t1;
1589	t0 = src[`4`]; t1 = src[`5`];
1590	dst1[i] = t0; dst1[i+`1`] = t1;
1591	t0 = src[`6`]; t1 = src[`7`];
1592	dst1[i+`2`] = t0; dst1[i+`3`] = t1;
1593	}
1594	}
1595	}
1596
1597
1598	static void
1599	CopyTo2Columns( const uchar* _src0, const uchar* _src1,
1600	uchar* _dst, size_t dst_step,
1601	int len, size_t elem_size )
1602	{
1603	int i, t0, t1;
1604	const int* src0 = (const int*)_src0;
1605	const int* src1 = (const int*)_src1;
1606	int* dst = (int*)_dst;
1607	dst_step /= sizeof(dst[`0`]);
1608
1609	if( elem_size == sizeof(int) )
1610	{
1611	for( i = `0`; i < len; i++, dst += dst_step )
1612	{
1613	t0 = src0[i]; t1 = src1[i];
1614	dst[`0`] = t0; dst[`1`] = t1;
1615	}
1616	}
1617	else if( elem_size == sizeof(int)*`2` )
1618	{
1619	for( i = `0`; i < len*`2`; i += `2`, dst += dst_step )
1620	{
1621	t0 = src0[i]; t1 = src0[i+`1`];
1622	dst[`0`] = t0; dst[`1`] = t1;
1623	t0 = src1[i]; t1 = src1[i+`1`];
1624	dst[`2`] = t0; dst[`3`] = t1;
1625	}
1626	}
1627	else if( elem_size == sizeof(int)*`4` )
1628	{
1629	for( i = `0`; i < len*`4`; i += `4`, dst += dst_step )
1630	{
1631	t0 = src0[i]; t1 = src0[i+`1`];
1632	dst[`0`] = t0; dst[`1`] = t1;
1633	t0 = src0[i+`2`]; t1 = src0[i+`3`];
1634	dst[`2`] = t0; dst[`3`] = t1;
1635	t0 = src1[i]; t1 = src1[i+`1`];
1636	dst[`4`] = t0; dst[`5`] = t1;
1637	t0 = src1[i+`2`]; t1 = src1[i+`3`];
1638	dst[`6`] = t0; dst[`7`] = t1;
1639	}
1640	}
1641	}
1642
1643
1644	static void
1645	ExpandCCS( uchar* _ptr, int n, int elem_size )
1646	{
1647	int i;
1648	if( elem_size == (int)sizeof(float) )
1649	{
1650	float* p = (float*)_ptr;
1651	for( i = `1`; i < (n+`1`)/`2`; i++ )
1652	{
1653	p[(n-i)`2`] = p[i`2`-`1`];
1654	p[(n-i)`2`+`1`] = -p[i`2`];
1655	}
1656	if( (n & `1`) == `0` )
1657	{
1658	p[n] = p[n-`1`];
1659	p[n+`1`] = `0.f`;
1660	n--;
1661	}
1662	for( i = n-`1`; i > `0`; i-- )
1663	p[i+`1`] = p[i];
1664	p[`1`] = `0.f`;
1665	}
1666	else
1667	{
1668	double* p = (double*)_ptr;
1669	for( i = `1`; i < (n+`1`)/`2`; i++ )
1670	{
1671	p[(n-i)`2`] = p[i`2`-`1`];
1672	p[(n-i)`2`+`1`] = -p[i`2`];
1673	}
1674	if( (n & `1`) == `0` )
1675	{
1676	p[n] = p[n-`1`];
1677	p[n+`1`] = `0.f`;
1678	n--;
1679	}
1680	for( i = n-`1`; i > `0`; i-- )
1681	p[i+`1`] = p[i];
1682	p[`1`] = `0.f`;
1683	}
1684	}
1685
1686	static void DFT_32f(const OcvDftOptions & c, const Complexf* src, Complexf* dst)
1687	{
1688	DFT(c, src, dst);
1689	}
1690
1691	static void DFT_64f(const OcvDftOptions & c, const Complexd* src, Complexd* dst)
1692	{
1693	DFT(c, src, dst);
1694	}
1695
1696
1697	static void RealDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
1698	{
1699	RealDFT(c, src, dst);
1700	}
1701
1702	static void RealDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
1703	{
1704	RealDFT(c, src, dst);
1705	}
1706
1707	static void CCSIDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
1708	{
1709	CCSIDFT(c, src, dst);
1710	}
1711
1712	static void CCSIDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
1713	{
1714	CCSIDFT(c, src, dst);
1715	}
1716
1717	}
1718
1719	#ifdef USE_IPP_DFT
1720	typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int, int*, int**);
1721	typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void, uchar);
1722	#endif
1723
1724	namespace cv
1725	{
1726	#if defined USE_IPP_DFT
1727
1728	typedef IppStatus (CV_STDCALL* ippiDFT_C_Func)(const Ipp32fc, int, Ipp32fc, int, const IppiDFTSpec_C_32fc, Ipp8u);
1729	typedef IppStatus (CV_STDCALL* ippiDFT_R_Func)(const Ipp32f* , int, Ipp32f* , int, const IppiDFTSpec_R_32f* , Ipp8u*);
1730
1731	template <typename Dft>
1732	class Dft_C_IPPLoop_Invoker : public ParallelLoopBody
1733	{
1734	public:
1735
1736	Dft_C_IPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width,
1737	const Dft& _ippidft, int _norm_flag, bool *_ok) :
1738	ParallelLoopBody(),
1739	src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
1740	ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
1741	{
1742	ok = true*;
1743	}
1744
1745	virtual void operator()(const Range& range) const CV_OVERRIDE
1746	{
1747	IppStatus status;
1748	Ipp8u* pBuffer = `0`;
1749	Ipp8u* pMemInit= `0`;
1750	int sizeBuffer=`0`;
1751	int sizeSpec=`0`;
1752	int sizeInit=`0`;
1753
1754	IppiSize srcRoiSize = {.width: width, .height: `1`};
1755
1756	status = ippiDFTGetSize_C_32fc(roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pSizeSpec: &sizeSpec, pSizeInit: &sizeInit, pSizeBuf: &sizeBuffer );
1757	if ( status < `0` )
1758	{
1759	ok = false*;
1760	return;
1761	}
1762
1763	IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)CV_IPP_MALLOC( sizeSpec );
1764
1765	if ( sizeInit > `0` )
1766	pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1767
1768	if ( sizeBuffer > `0` )
1769	pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1770
1771	status = ippiDFTInit_C_32fc( roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pDFTSpec, pMemInit );
1772
1773	if ( sizeInit > `0` )
1774	ippFree( ptr: pMemInit );
1775
1776	if ( status < `0` )
1777	{
1778	ippFree( ptr: pDFTSpec );
1779	if ( sizeBuffer > `0` )
1780	ippFree( ptr: pBuffer );
1781	ok = false*;
1782	return;
1783	}
1784
1785	for( int i = range.start; i < range.end; ++i)
1786	if(!ippidft((Ipp32fc)(src + src_step i), src_step, (Ipp32fc)(dst + dst_step i), dst_step,
1787	pDFTSpec, (Ipp8u*)pBuffer))
1788	{
1789	ok = false*;
1790	}
1791
1792	if ( sizeBuffer > `0` )
1793	ippFree( ptr: pBuffer );
1794
1795	ippFree( ptr: pDFTSpec );
1796	CV_IMPL_ADD(CV_IMPL_IPP\|CV_IMPL_MT);
1797	}
1798
1799	private:
1800	const uchar * src;
1801	size_t src_step;
1802	uchar * dst;
1803	size_t dst_step;
1804	int width;
1805	const Dft& ippidft;
1806	int norm_flag;
1807	bool *ok;
1808
1809	const Dft_C_IPPLoop_Invoker& operator= (const Dft_C_IPPLoop_Invoker&);
1810	};
1811
1812	template <typename Dft>
1813	class Dft_R_IPPLoop_Invoker : public ParallelLoopBody
1814	{
1815	public:
1816
1817	Dft_R_IPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width,
1818	const Dft& _ippidft, int _norm_flag, bool *_ok) :
1819	ParallelLoopBody(),
1820	src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
1821	ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
1822	{
1823	ok = true*;
1824	}
1825
1826	virtual void operator()(const Range& range) const CV_OVERRIDE
1827	{
1828	IppStatus status;
1829	Ipp8u* pBuffer = `0`;
1830	Ipp8u* pMemInit= `0`;
1831	int sizeBuffer=`0`;
1832	int sizeSpec=`0`;
1833	int sizeInit=`0`;
1834
1835	IppiSize srcRoiSize = {.width: width, .height: `1`};
1836
1837	status = ippiDFTGetSize_R_32f(roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pSizeSpec: &sizeSpec, pSizeInit: &sizeInit, pSizeBuf: &sizeBuffer );
1838	if ( status < `0` )
1839	{
1840	ok = false*;
1841	return;
1842	}
1843
1844	IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)CV_IPP_MALLOC( sizeSpec );
1845
1846	if ( sizeInit > `0` )
1847	pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1848
1849	if ( sizeBuffer > `0` )
1850	pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1851
1852	status = ippiDFTInit_R_32f( roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pDFTSpec, pMemInit );
1853
1854	if ( sizeInit > `0` )
1855	ippFree( ptr: pMemInit );
1856
1857	if ( status < `0` )
1858	{
1859	ippFree( ptr: pDFTSpec );
1860	if ( sizeBuffer > `0` )
1861	ippFree( ptr: pBuffer );
1862	ok = false*;
1863	return;
1864	}
1865
1866	for( int i = range.start; i < range.end; ++i)
1867	if(!ippidft((float)(src + src_step i), src_step, (float)(dst + dst_step i), dst_step,
1868	pDFTSpec, (Ipp8u*)pBuffer))
1869	{
1870	ok = false*;
1871	}
1872
1873	if ( sizeBuffer > `0` )
1874	ippFree( ptr: pBuffer );
1875
1876	ippFree( ptr: pDFTSpec );
1877	CV_IMPL_ADD(CV_IMPL_IPP\|CV_IMPL_MT);
1878	}
1879
1880	private:
1881	const uchar * src;
1882	size_t src_step;
1883	uchar * dst;
1884	size_t dst_step;
1885	int width;
1886	const Dft& ippidft;
1887	int norm_flag;
1888	bool *ok;
1889
1890	const Dft_R_IPPLoop_Invoker& operator= (const Dft_R_IPPLoop_Invoker&);
1891	};
1892
1893	template <typename Dft>
1894	bool Dft_C_IPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, const Dft& ippidft, int norm_flag)
1895	{
1896	bool ok;
1897	parallel_for_(Range (`0`, height), Dft_C_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(`1`<<`16`) );
1898	return ok;
1899	}
1900
1901	template <typename Dft>
1902	bool Dft_R_IPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, const Dft& ippidft, int norm_flag)
1903	{
1904	bool ok;
1905	parallel_for_(Range (`0`, height), Dft_R_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(`1`<<`16`) );
1906	return ok;
1907	}
1908
1909	struct IPPDFT_C_Functor
1910	{
1911	IPPDFT_C_Functor(ippiDFT_C_Func _func) : ippiDFT_CToC_32fc_C1R(_func){}
1912
1913	bool operator()(const Ipp32fc* src, size_t srcStep, Ipp32fc* dst, size_t dstStep, const IppiDFTSpec_C_32fc* pDFTSpec, Ipp8u* pBuffer) const
1914	{
1915	return ippiDFT_CToC_32fc_C1R ? CV_INSTRUMENT_FUN_IPP(ippiDFT_CToC_32fc_C1R, src, static_cast<int>(srcStep), dst, static_cast<int>(dstStep), pDFTSpec, pBuffer) >= `0` : false;
1916	}
1917	private:
1918	ippiDFT_C_Func ippiDFT_CToC_32fc_C1R;
1919	};
1920
1921	struct IPPDFT_R_Functor
1922	{
1923	IPPDFT_R_Functor(ippiDFT_R_Func _func) : ippiDFT_PackToR_32f_C1R(_func){}
1924
1925	bool operator()(const Ipp32f* src, size_t srcStep, Ipp32f* dst, size_t dstStep, const IppiDFTSpec_R_32f* pDFTSpec, Ipp8u* pBuffer) const
1926	{
1927	return ippiDFT_PackToR_32f_C1R ? CV_INSTRUMENT_FUN_IPP(ippiDFT_PackToR_32f_C1R, src, static_cast<int>(srcStep), dst, static_cast<int>(dstStep), pDFTSpec, pBuffer) >= `0` : false;
1928	}
1929	private:
1930	ippiDFT_R_Func ippiDFT_PackToR_32f_C1R;
1931	};
1932
1933	static bool ippi_DFT_C_32F(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, int norm_flag)
1934	{
1935	CV_INSTRUMENT_REGION_IPP();
1936
1937	IppStatus status;
1938	Ipp8u* pBuffer = `0`;
1939	Ipp8u* pMemInit= `0`;
1940	int sizeBuffer=`0`;
1941	int sizeSpec=`0`;
1942	int sizeInit=`0`;
1943
1944	IppiSize srcRoiSize = {.width: width, .height: height};
1945
1946	status = ippiDFTGetSize_C_32fc(roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pSizeSpec: &sizeSpec, pSizeInit: &sizeInit, pSizeBuf: &sizeBuffer );
1947	if ( status < `0` )
1948	return false;
1949
1950	IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)CV_IPP_MALLOC( sizeSpec );
1951
1952	if ( sizeInit > `0` )
1953	pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1954
1955	if ( sizeBuffer > `0` )
1956	pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1957
1958	status = ippiDFTInit_C_32fc( roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pDFTSpec, pMemInit );
1959
1960	if ( sizeInit > `0` )
1961	ippFree( ptr: pMemInit );
1962
1963	if ( status < `0` )
1964	{
1965	ippFree( ptr: pDFTSpec );
1966	if ( sizeBuffer > `0` )
1967	ippFree( ptr: pBuffer );
1968	return false;
1969	}
1970
1971	if (!inv)
1972	status = CV_INSTRUMENT_FUN_IPP(ippiDFTFwd_CToC_32fc_C1R, (Ipp32fc)src, static_cast<int>(src_step), (Ipp32fc)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
1973	else
1974	status = CV_INSTRUMENT_FUN_IPP(ippiDFTInv_CToC_32fc_C1R, (Ipp32fc)src, static_cast<int>(src_step), (Ipp32fc)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
1975
1976	if ( sizeBuffer > `0` )
1977	ippFree( ptr: pBuffer );
1978
1979	ippFree( ptr: pDFTSpec );
1980
1981	if(status >= `0`)
1982	{
1983	CV_IMPL_ADD(CV_IMPL_IPP);
1984	return true;
1985	}
1986	return false;
1987	}
1988
1989	static bool ippi_DFT_R_32F(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, int norm_flag)
1990	{
1991	CV_INSTRUMENT_REGION_IPP();
1992
1993	IppStatus status;
1994	Ipp8u* pBuffer = `0`;
1995	Ipp8u* pMemInit= `0`;
1996	int sizeBuffer=`0`;
1997	int sizeSpec=`0`;
1998	int sizeInit=`0`;
1999
2000	IppiSize srcRoiSize = {.width: width, .height: height};
2001
2002	status = ippiDFTGetSize_R_32f(roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pSizeSpec: &sizeSpec, pSizeInit: &sizeInit, pSizeBuf: &sizeBuffer );
2003	if ( status < `0` )
2004	return false;
2005
2006	IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)CV_IPP_MALLOC( sizeSpec );
2007
2008	if ( sizeInit > `0` )
2009	pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
2010
2011	if ( sizeBuffer > `0` )
2012	pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
2013
2014	status = ippiDFTInit_R_32f( roiSize: srcRoiSize, flag: norm_flag, hint: ippAlgHintNone, pDFTSpec, pMemInit );
2015
2016	if ( sizeInit > `0` )
2017	ippFree( ptr: pMemInit );
2018
2019	if ( status < `0` )
2020	{
2021	ippFree( ptr: pDFTSpec );
2022	if ( sizeBuffer > `0` )
2023	ippFree( ptr: pBuffer );
2024	return false;
2025	}
2026
2027	if (!inv)
2028	status = CV_INSTRUMENT_FUN_IPP(ippiDFTFwd_RToPack_32f_C1R, (float)src, static_cast<int>(src_step), (float)dst, static_cast*<int*>(dst_step), pDFTSpec, pBuffer);
2029	else
2030	status = CV_INSTRUMENT_FUN_IPP(ippiDFTInv_PackToR_32f_C1R, (float)src, static_cast<int>(src_step), (float)dst, static_cast*<int*>(dst_step), pDFTSpec, pBuffer);
2031
2032	if ( sizeBuffer > `0` )
2033	ippFree( ptr: pBuffer );
2034
2035	ippFree( ptr: pDFTSpec );
2036
2037	if(status >= `0`)
2038	{
2039	CV_IMPL_ADD(CV_IMPL_IPP);
2040	return true;
2041	}
2042	return false;
2043	}
2044
2045	#endif
2046	}
2047
2048	#ifdef HAVE_OPENCL
2049
2050	namespace cv
2051	{
2052
2053	enum FftType
2054	{
2055	R2R = `0`, // real to CCS in case forward transform, CCS to real otherwise
2056	C2R = `1`, // complex to real in case inverse transform
2057	R2C = `2`, // real to complex in case forward transform
2058	C2C = `3` // complex to complex
2059	};
2060
2061	struct OCL_FftPlan
2062	{
2063	private:
2064	UMat twiddles;
2065	String buildOptions;
2066	int thread_count;
2067	int dft_size;
2068	int dft_depth;
2069	bool status;
2070
2071	public:
2072	OCL_FftPlan(int _size, int _depth) : dft_size(_size), dft_depth(_depth), status(true)
2073	{
2074	CV_Assert( dft_depth == CV_32F \|\| dft_depth == CV_64F );
2075
2076	int min_radix;
2077	std::vector<int> radixes, blocks;
2078	ocl_getRadixes(cols: dft_size, radixes, blocks, min_radix);
2079	thread_count = dft_size / min_radix;
2080
2081	if (thread_count > (int) ocl::Device::getDefault().maxWorkGroupSize())
2082	{
2083	status = false;
2084	return;
2085	}
2086
2087	// generate string with radix calls
2088	String radix_processing;
2089	int n = `1`, twiddle_size = `0`;
2090	for (size_t i=`0`; i<radixes.size(); i++)
2091	{
2092	int radix = radixes [i], block = blocks [i];
2093	if (block > `1`)
2094	radix_processing += format(fmt: "fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);", radix, block, twiddle_size, n, dft_size/radix);
2095	else
2096	radix_processing += format(fmt: "fft_radix%d(smem,twiddles+%d,ind,%d,%d);", radix, twiddle_size, n, dft_size/radix);
2097	twiddle_size += (radix-`1`)*n;
2098	n *= radix;
2099	}
2100
2101	twiddles.create(rows: `1`, cols: twiddle_size, CV_MAKE_TYPE(dft_depth, `2`));
2102	if (dft_depth == CV_32F)
2103	fillRadixTable<float>(twiddles, radixes);
2104	else
2105	fillRadixTable<double>(twiddles, radixes);
2106
2107	buildOptions = format(fmt: "-D LOCAL_SIZE=%d -D kercn=%d -D FT=%s -D CT=%s%s -D RADIX_PROCESS=%s",
2108	dft_size, min_radix, ocl::typeToStr(t: dft_depth), ocl::typeToStr(CV_MAKE_TYPE(dft_depth, `2`)),
2109	dft_depth == CV_64F ? " -D DOUBLE_SUPPORT" : "", radix_processing.c_str());
2110	}
2111
2112	bool enqueueTransform(InputArray _src, OutputArray _dst, int num_dfts, int flags, int fftType, bool rows = true) const
2113	{
2114	if (!status)
2115	return false;
2116
2117	UMat src = _src.getUMat();
2118	UMat dst = _dst.getUMat();
2119
2120	size_t globalsize[`2`];
2121	size_t localsize[`2`];
2122	String kernel_name;
2123
2124	bool is1d = (flags & DFT_ROWS) != `0` \|\| num_dfts == `1`;
2125	bool inv = (flags & DFT_INVERSE) != `0`;
2126	String options = buildOptions;
2127
2128	if (rows)
2129	{
2130	globalsize[`0`] = thread_count; globalsize[`1`] = src.rows;
2131	localsize[`0`] = thread_count; localsize[`1`] = `1`;
2132	kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows";
2133	if ((is1d \|\| inv) && (flags & DFT_SCALE))
2134	options += " -D DFT_SCALE";
2135	}
2136	else
2137	{
2138	globalsize[`0`] = num_dfts; globalsize[`1`] = thread_count;
2139	localsize[`0`] = `1`; localsize[`1`] = thread_count;
2140	kernel_name = !inv ? "fft_multi_radix_cols" : "ifft_multi_radix_cols";
2141	if (flags & DFT_SCALE)
2142	options += " -D DFT_SCALE";
2143	}
2144
2145	options += src.channels() == `1` ? " -D REAL_INPUT" : " -D COMPLEX_INPUT";
2146	options += dst.channels() == `1` ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT";
2147	options += is1d ? " -D IS_1D" : "";
2148
2149	if (!inv)
2150	{
2151	if ((is1d && src.channels() == `1`) \|\| (rows && (fftType == R2R)))
2152	options += " -D NO_CONJUGATE";
2153	}
2154	else
2155	{
2156	if (rows && (fftType == C2R \|\| fftType == R2R))
2157	options += " -D NO_CONJUGATE";
2158	if (dst.cols % `2` == `0`)
2159	options += " -D EVEN";
2160	}
2161
2162	ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options);
2163	if (k.empty())
2164	return false;
2165
2166	k.args(kernel_args: ocl::KernelArg::ReadOnly(m: src), kernel_args: ocl::KernelArg::WriteOnly(m: dst), kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: twiddles), kernel_args: thread_count, kernel_args: num_dfts);
2167	return k.run(dims: `2`, globalsize, localsize, sync: false);
2168	}
2169
2170	private:
2171	static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix)
2172	{
2173	int factors[`34`];
2174	int nf = DFTFactorize(n: cols, factors);
2175
2176	int n = `1`;
2177	int factor_index = `0`;
2178	min_radix = INT_MAX;
2179
2180	// 2^n transforms
2181	if ((factors[factor_index] & `1`) == `0`)
2182	{
2183	for( ; n < factors[factor_index];)
2184	{
2185	int radix = `2`, block = `1`;
2186	if (`8`*n <= factors[`0`])
2187	radix = `8`;
2188	else if (`4`*n <= factors[`0`])
2189	{
2190	radix = `4`;
2191	if (cols % `12` == `0`)
2192	block = `3`;
2193	else if (cols % `8` == `0`)
2194	block = `2`;
2195	}
2196	else
2197	{
2198	if (cols % `10` == `0`)
2199	block = `5`;
2200	else if (cols % `8` == `0`)
2201	block = `4`;
2202	else if (cols % `6` == `0`)
2203	block = `3`;
2204	else if (cols % `4` == `0`)
2205	block = `2`;
2206	}
2207
2208	radixes.push_back(x: radix);
2209	blocks.push_back(x: block);
2210	min_radix = min(a: min_radix, b: block*radix);
2211	n *= radix;
2212	}
2213	factor_index++;
2214	}
2215
2216	// all the other transforms
2217	for( ; factor_index < nf; factor_index++)
2218	{
2219	int radix = factors[factor_index], block = `1`;
2220	if (radix == `3`)
2221	{
2222	if (cols % `12` == `0`)
2223	block = `4`;
2224	else if (cols % `9` == `0`)
2225	block = `3`;
2226	else if (cols % `6` == `0`)
2227	block = `2`;
2228	}
2229	else if (radix == `5`)
2230	{
2231	if (cols % `10` == `0`)
2232	block = `2`;
2233	}
2234	radixes.push_back(x: radix);
2235	blocks.push_back(x: block);
2236	min_radix = min(a: min_radix, b: block*radix);
2237	}
2238	}
2239
2240	template <typename T>
2241	static void fillRadixTable(UMat twiddles, const std::vector<int>& radixes)
2242	{
2243	Mat tw = twiddles.getMat(flags: ACCESS_WRITE);
2244	T* ptr = tw.ptr<T>();
2245	int ptr_index = `0`;
2246
2247	int n = `1`;
2248	for (size_t i=`0`; i<radixes.size(); i++)
2249	{
2250	int radix = radixes [i];
2251	n *= radix;
2252
2253	for (int j=`1`; j<radix; j++)
2254	{
2255	double theta = -CV_2PI*j/n;
2256
2257	for (int k=`0`; k<(n/radix); k++)
2258	{
2259	ptr[ptr_index++] = (T) cos(x: k*theta);
2260	ptr[ptr_index++] = (T) sin(x: k*theta);
2261	}
2262	}
2263	}
2264	}
2265	};
2266
2267	class OCL_FftPlanCache
2268	{
2269	public:
2270	static OCL_FftPlanCache & getInstance()
2271	{
2272	CV_SINGLETON_LAZY_INIT_REF(OCL_FftPlanCache, new OCL_FftPlanCache ())
2273	}
2274
2275	Ptr<OCL_FftPlan> getFftPlan(int dft_size, int depth)
2276	{
2277	int key = (dft_size << `16`) \| (depth & `0xFFFF`);
2278	std::map<int, Ptr<OCL_FftPlan> >::iterator f = planStorage.find(x: key);
2279	if (f != planStorage.end())
2280	{
2281	return f ->second;
2282	}
2283	else
2284	{
2285	Ptr<OCL_FftPlan> newPlan = Ptr<OCL_FftPlan>(new OCL_FftPlan (dft_size, depth));
2286	planStorage [key] = newPlan;
2287	return newPlan;
2288	}
2289	}
2290
2291	~OCL_FftPlanCache()
2292	{
2293	planStorage.clear();
2294	}
2295
2296	protected:
2297	OCL_FftPlanCache() :
2298	planStorage ()
2299	{
2300	}
2301	std::map<int, Ptr<OCL_FftPlan> > planStorage;
2302	};
2303
2304	static bool ocl_dft_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType)
2305	{
2306	int type = _src.type(), depth = CV_MAT_DEPTH(type);
2307	Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(dft_size: _src.cols(), depth);
2308	return plan ->enqueueTransform(_src, _dst, num_dfts: nonzero_rows, flags, fftType, rows: true);
2309	}
2310
2311	static bool ocl_dft_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType)
2312	{
2313	int type = _src.type(), depth = CV_MAT_DEPTH(type);
2314	Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(dft_size: _src.rows(), depth);
2315	return plan ->enqueueTransform(_src, _dst, num_dfts: nonzero_cols, flags, fftType, rows: false);
2316	}
2317
2318	inline FftType determineFFTType(bool real_input, bool complex_input, bool real_output, bool complex_output, bool inv)
2319	{
2320	// output format is not specified
2321	if (!real_output && !complex_output)
2322	complex_output = true;
2323
2324	// input or output format is ambiguous
2325	if (real_input == complex_input \|\| real_output == complex_output)
2326	CV_Error(Error::StsBadArg, "Invalid FFT input or output format");
2327
2328	FftType result = real_input ? (real_output ? R2R : R2C) : (real_output ? C2R : C2C);
2329
2330	// Forward Complex to CCS not supported
2331	if (result == C2R && !inv)
2332	result = C2C;
2333
2334	// Inverse CCS to Complex not supported
2335	if (result == R2C && inv)
2336	result = R2R;
2337
2338	return result;
2339	}
2340
2341	static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows)
2342	{
2343	int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
2344	Size ssize = _src.size();
2345	bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > `0`;
2346
2347	if (!(cn == `1` \|\| cn == `2`)
2348	\|\| !(depth == CV_32F \|\| (depth == CV_64F && doubleSupport))
2349	\|\| ((flags & DFT_REAL_OUTPUT) && (flags & DFT_COMPLEX_OUTPUT)))
2350	return false;
2351
2352	// if is not a multiplication of prime numbers { 2, 3, 5 }
2353	if (ssize.area() != getOptimalDFTSize(vecsize: ssize.area()))
2354	return false;
2355
2356	UMat src = _src.getUMat();
2357	bool inv = (flags & DFT_INVERSE) != `0` ? `1` : `0`;
2358
2359	if( nonzero_rows <= `0` \|\| nonzero_rows > _src.rows() )
2360	nonzero_rows = _src.rows();
2361	bool is1d = (flags & DFT_ROWS) != `0` \|\| nonzero_rows == `1`;
2362
2363	FftType fftType = determineFFTType(real_input: cn == `1`, complex_input: cn == `2`,
2364	real_output: (flags & DFT_REAL_OUTPUT) != `0`, complex_output: (flags & DFT_COMPLEX_OUTPUT) != `0`, inv);
2365
2366	UMat output;
2367	if (fftType == C2C \|\| fftType == R2C)
2368	{
2369	// complex output
2370	_dst.create(sz: src.size (), CV_MAKETYPE(depth, `2`));
2371	output = _dst.getUMat();
2372	}
2373	else
2374	{
2375	// real output
2376	if (is1d)
2377	{
2378	_dst.create(sz: src.size (), CV_MAKETYPE(depth, `1`));
2379	output = _dst.getUMat();
2380	}
2381	else
2382	{
2383	_dst.create(sz: src.size (), CV_MAKETYPE(depth, `1`));
2384	output.create(size: src.size (), CV_MAKETYPE(depth, `2`));
2385	}
2386	}
2387
2388	bool result = false;
2389	if (!inv)
2390	{
2391	int nonzero_cols = fftType == R2R ? output.cols/`2` + `1` : output.cols;
2392	result = ocl_dft_rows(src: src, dst: output, nonzero_rows, flags, fftType);
2393	if (!is1d)
2394	result = result && ocl_dft_cols(src: output, _dst, nonzero_cols, flags, fftType);
2395	}
2396	else
2397	{
2398	if (fftType == C2C)
2399	{
2400	// complex output
2401	result = ocl_dft_rows(src: src, dst: output, nonzero_rows, flags, fftType);
2402	if (!is1d)
2403	result = result && ocl_dft_cols(src: output, dst: output, nonzero_cols: output.cols, flags, fftType);
2404	}
2405	else
2406	{
2407	if (is1d)
2408	{
2409	result = ocl_dft_rows(src: src, dst: output, nonzero_rows, flags, fftType);
2410	}
2411	else
2412	{
2413	int nonzero_cols = src.cols/`2` + `1`;
2414	result = ocl_dft_cols(src: src, dst: output, nonzero_cols, flags, fftType);
2415	result = result && ocl_dft_rows(src: output, _dst, nonzero_rows, flags, fftType);
2416	}
2417	}
2418	}
2419	return result;
2420	}
2421
2422	} // namespace cv;
2423
2424	#endif
2425
2426	#ifdef HAVE_CLAMDFFT
2427
2428	namespace cv {
2429
2430	#define CLAMDDFT_Assert(func) \
2431	{ \
2432	clfftStatus s = (func); \
2433	CV_Assert(s == CLFFT_SUCCESS); \
2434	}
2435
2436	class PlanCache
2437	{
2438	struct FftPlan
2439	{
2440	FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) :
2441	dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step),
2442	doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType),
2443	context((cl_context)ocl::Context::getDefault().ptr()), plHandle(`0`)
2444	{
2445	bool dft_inverse = (flags & DFT_INVERSE) != `0`;
2446	bool dft_scale = (flags & DFT_SCALE) != `0`;
2447	bool dft_rows = (flags & DFT_ROWS) != `0`;
2448
2449	clfftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
2450	clfftDim dim = dft_size.height == `1` \|\| dft_rows ? CLFFT_1D : CLFFT_2D;
2451
2452	size_t batchSize = dft_rows ? dft_size.height : `1`;
2453	size_t clLengthsIn[`3`] = { (size_t)dft_size.width, dft_rows ? `1` : (size_t)dft_size.height, `1` };
2454	size_t clStridesIn[`3`] = { `1`, `1`, `1` };
2455	size_t clStridesOut[`3`] = { `1`, `1`, `1` };
2456	int elemSize = doubleFP ? sizeof(double) : sizeof(float);
2457
2458	switch (fftType)
2459	{
2460	case C2C:
2461	inLayout = CLFFT_COMPLEX_INTERLEAVED;
2462	outLayout = CLFFT_COMPLEX_INTERLEAVED;
2463	clStridesIn[`1`] = src_step / (elemSize << `1`);
2464	clStridesOut[`1`] = dst_step / (elemSize << `1`);
2465	break;
2466	case R2C:
2467	inLayout = CLFFT_REAL;
2468	outLayout = CLFFT_HERMITIAN_INTERLEAVED;
2469	clStridesIn[`1`] = src_step / elemSize;
2470	clStridesOut[`1`] = dst_step / (elemSize << `1`);
2471	break;
2472	case C2R:
2473	inLayout = CLFFT_HERMITIAN_INTERLEAVED;
2474	outLayout = CLFFT_REAL;
2475	clStridesIn[`1`] = src_step / (elemSize << `1`);
2476	clStridesOut[`1`] = dst_step / elemSize;
2477	break;
2478	case R2R:
2479	default:
2480	CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type");
2481	break;
2482	}
2483
2484	clStridesIn[`2`] = dft_rows ? clStridesIn[`1`] : dft_size.width * clStridesIn[`1`];
2485	clStridesOut[`2`] = dft_rows ? clStridesOut[`1`] : dft_size.width * clStridesOut[`1`];
2486
2487	CLAMDDFT_Assert(clfftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn))
2488
2489	// setting plan properties
2490	CLAMDDFT_Assert(clfftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
2491	CLAMDDFT_Assert(clfftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
2492	CLAMDDFT_Assert(clfftSetLayout(plHandle, inLayout, outLayout))
2493	CLAMDDFT_Assert(clfftSetPlanBatchSize(plHandle, batchSize))
2494	CLAMDDFT_Assert(clfftSetPlanInStride(plHandle, dim, clStridesIn))
2495	CLAMDDFT_Assert(clfftSetPlanOutStride(plHandle, dim, clStridesOut))
2496	CLAMDDFT_Assert(clfftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))
2497
2498	float scale = dft_scale ? `1.0f` / (dft_rows ? dft_size.width : dft_size.area()) : `1.0f`;
2499	CLAMDDFT_Assert(clfftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))
2500
2501	// ready to bake
2502	cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
2503	CLAMDDFT_Assert(clfftBakePlan(plHandle, `1`, &queue, NULL, NULL))
2504	}
2505
2506	~FftPlan()
2507	{
2508	// Do not tear down clFFT.
2509	// The user application may still use clFFT even after OpenCV is unloaded.
2510	/clfftDestroyPlan(&plHandle);/
2511	}
2512
2513	friend class PlanCache;
2514
2515	private:
2516	Size dft_size;
2517	int src_step, dst_step;
2518	bool doubleFP;
2519	bool inplace;
2520	int flags;
2521	FftType fftType;
2522
2523	cl_context context;
2524	clfftPlanHandle plHandle;
2525	};
2526
2527	public:
2528	static PlanCache & getInstance()
2529	{
2530	CV_SINGLETON_LAZY_INIT_REF(PlanCache, new PlanCache())
2531	}
2532
2533	clfftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
2534	bool inplace, int flags, FftType fftType)
2535	{
2536	cl_context currentContext = (cl_context)ocl::Context::getDefault().ptr();
2537
2538	for (size_t i = `0`, size = planStorage.size(); i < size; ++i)
2539	{
2540	const FftPlan * const plan = planStorage[i];
2541
2542	if (plan->dft_size == dft_size &&
2543	plan->flags == flags &&
2544	plan->src_step == src_step &&
2545	plan->dst_step == dst_step &&
2546	plan->doubleFP == doubleFP &&
2547	plan->fftType == fftType &&
2548	plan->inplace == inplace)
2549	{
2550	if (plan->context != currentContext)
2551	{
2552	planStorage.erase(planStorage.begin() + i);
2553	break;
2554	}
2555
2556	return plan->plHandle;
2557	}
2558	}
2559
2560	// no baked plan is found, so let's create a new one
2561	Ptr<FftPlan> newPlan = Ptr<FftPlan>(new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType));
2562	planStorage.push_back(newPlan);
2563
2564	return newPlan->plHandle;
2565	}
2566
2567	~PlanCache()
2568	{
2569	planStorage.clear();
2570	}
2571
2572	protected:
2573	PlanCache() :
2574	planStorage()
2575	{
2576	}
2577
2578	std::vector<Ptr<FftPlan> > planStorage;
2579	};
2580
2581	extern "C" {
2582
2583	static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
2584	{
2585	UMatData * u = (UMatData *)p;
2586
2587	if( u && CV_XADD(&u->urefcount, -`1`) == `1` )
2588	u->currAllocator->deallocate(u);
2589	u = `0`;
2590
2591	clReleaseEvent(e), e = `0`;
2592	}
2593
2594	}
2595
2596	static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags)
2597	{
2598	int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2599	Size ssize = _src.size();
2600
2601	bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > `0`;
2602	if ( (!doubleSupport && depth == CV_64F) \|\|
2603	!(type == CV_32FC1 \|\| type == CV_32FC2 \|\| type == CV_64FC1 \|\| type == CV_64FC2) \|\|
2604	_src.offset() != `0`)
2605	return false;
2606
2607	// if is not a multiplication of prime numbers { 2, 3, 5 }
2608	if (ssize.area() != getOptimalDFTSize(ssize.area()))
2609	return false;
2610
2611	int dst_complex_input = cn == `2` ? `1` : `0`;
2612	bool dft_inverse = (flags & DFT_INVERSE) != `0` ? `1` : `0`;
2613	int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != `0`;
2614	bool dft_real_output = (flags & DFT_REAL_OUTPUT) != `0`;
2615
2616	CV_Assert(dft_complex_output + dft_real_output < `2`);
2617	FftType fftType = (FftType)(dst_complex_input << `0` \| dft_complex_output << `1`);
2618
2619	switch (fftType)
2620	{
2621	case C2C:
2622	_dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, `2`));
2623	break;
2624	case R2C: // TODO implement it if possible
2625	case C2R: // TODO implement it if possible
2626	case R2R: // AMD Fft does not support this type
2627	default:
2628	return false;
2629	}
2630
2631	UMat src = _src.getUMat(), dst = _dst.getUMat();
2632	bool inplace = src.u == dst.u;
2633
2634	clfftPlanHandle plHandle = PlanCache::getInstance().
2635	getPlanHandle(ssize, (int)src.step, (int)dst.step,
2636	depth == CV_64F, inplace, flags, fftType);
2637
2638	// get the bufferSize
2639	size_t bufferSize = `0`;
2640	CLAMDDFT_Assert(clfftGetTmpBufSize(plHandle, &bufferSize))
2641	UMat tmpBuffer(`1`, (int)bufferSize, CV_8UC1);
2642
2643	cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ);
2644	cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW);
2645
2646	cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
2647	cl_event e = `0`;
2648
2649	CLAMDDFT_Assert(clfftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
2650	`1`, &queue, `0`, NULL, &e,
2651	&srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))
2652
2653	tmpBuffer.addref();
2654	clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u);
2655	return true;
2656	}
2657
2658	#undef DFT_ASSERT
2659
2660	}
2661
2662	#endif // HAVE_CLAMDFFT
2663
2664	namespace cv
2665	{
2666
2667	template <typename T>
2668	static void complementComplex(T * ptr, size_t step, int n, int len, int dft_dims)
2669	{
2670	T* p0 = (T*)ptr;
2671	size_t dstep = step/sizeof(p0[`0`]);
2672	for(int i = `0`; i < len; i++ )
2673	{
2674	T* p = p0 + dstep*i;
2675	T* q = dft_dims == `1` \|\| i == `0` \|\| i`2` == len ? p : p0 + dstep(len-i);
2676
2677	for( int j = `1`; j < (n+`1`)/`2`; j++ )
2678	{
2679	p[(n-j)`2`] = q[j`2`];
2680	p[(n-j)`2`+`1`] = -q[j`2`+`1`];
2681	}
2682	}
2683	}
2684
2685	static void complementComplexOutput(int depth, uchar * ptr, size_t step, int count, int len, int dft_dims)
2686	{
2687	if( depth == CV_32F )
2688	complementComplex(ptr: (float*)ptr, step, n: count, len, dft_dims);
2689	else
2690	complementComplex(ptr: (double*)ptr, step, n: count, len, dft_dims);
2691	}
2692
2693	enum DftMode {
2694	InvalidDft = `0`,
2695	FwdRealToCCS,
2696	FwdRealToComplex,
2697	FwdComplexToComplex,
2698	InvCCSToReal,
2699	InvComplexToReal,
2700	InvComplexToComplex,
2701	};
2702
2703	enum DftDims {
2704	InvalidDim = `0`,
2705	OneDim,
2706	OneDimColWise,
2707	TwoDims
2708	};
2709
2710	inline const char * modeName(DftMode m)
2711	{
2712	switch (m)
2713	{
2714	case InvalidDft: return "InvalidDft";
2715	case FwdRealToCCS: return "FwdRealToCCS";
2716	case FwdRealToComplex: return "FwdRealToComplex";
2717	case FwdComplexToComplex: return "FwdComplexToComplex";
2718	case InvCCSToReal: return "InvCCSToReal";
2719	case InvComplexToReal: return "InvComplexToReal";
2720	case InvComplexToComplex: return "InvComplexToComplex";
2721	}
2722	return `0`;
2723	}
2724
2725	inline const char * dimsName(DftDims d)
2726	{
2727	switch (d)
2728	{
2729	case InvalidDim: return "InvalidDim";
2730	case OneDim: return "OneDim";
2731	case OneDimColWise: return "OneDimColWise";
2732	case TwoDims: return "TwoDims";
2733	};
2734	return `0`;
2735	}
2736
2737	template <typename T>
2738	inline bool isInv(T mode)
2739	{
2740	switch ((DftMode)mode)
2741	{
2742	case InvCCSToReal:
2743	case InvComplexToReal:
2744	case InvComplexToComplex: return true;
2745	default: return false;
2746	}
2747	}
2748
2749	inline DftMode determineMode(bool inv, int cn1, int cn2)
2750	{
2751	if (!inv)
2752	{
2753	if (cn1 == `1` && cn2 == `1`)
2754	return FwdRealToCCS;
2755	else if (cn1 == `1` && cn2 == `2`)
2756	return FwdRealToComplex;
2757	else if (cn1 == `2` && cn2 == `2`)
2758	return FwdComplexToComplex;
2759	}
2760	else
2761	{
2762	if (cn1 == `1` && cn2 == `1`)
2763	return InvCCSToReal;
2764	else if (cn1 == `2` && cn2 == `1`)
2765	return InvComplexToReal;
2766	else if (cn1 == `2` && cn2 == `2`)
2767	return InvComplexToComplex;
2768	}
2769	return InvalidDft;
2770	}
2771
2772
2773	inline DftDims determineDims(int rows, int cols, bool isRowWise, bool isContinuous)
2774	{
2775	// printf("%d x %d (%d, %d)\n", rows, cols, isRowWise, isContinuous);
2776	if (isRowWise)
2777	return OneDim;
2778	if (cols == `1` && rows > `1`) // one-column-shaped input
2779	{
2780	if (isContinuous)
2781	return OneDim;
2782	else
2783	return OneDimColWise;
2784	}
2785	if (rows == `1`)
2786	return OneDim;
2787	if (cols > `1` && rows > `1`)
2788	return TwoDims;
2789	return InvalidDim;
2790	}
2791
2792	class OcvDftImpl CV_FINAL : public hal::DFT2D
2793	{
2794	protected:
2795	Ptr<hal::DFT1D> contextA;
2796	Ptr<hal::DFT1D> contextB;
2797	bool needBufferA;
2798	bool needBufferB;
2799	bool inv;
2800	int width;
2801	int height;
2802	DftMode mode;
2803	int elem_size;
2804	int complex_elem_size;
2805	int depth;
2806	bool real_transform;
2807	int nonzero_rows;
2808	bool isRowTransform;
2809	bool isScaled;
2810	std::vector<int> stages;
2811	bool useIpp;
2812	int src_channels;
2813	int dst_channels;
2814
2815	AutoBuffer<uchar> tmp_bufA;
2816	AutoBuffer<uchar> tmp_bufB;
2817	AutoBuffer<uchar> buf0;
2818	AutoBuffer<uchar> buf1;
2819
2820	public:
2821	OcvDftImpl()
2822	{
2823	needBufferA = false;
2824	needBufferB = false;
2825	inv = false;
2826	width = `0`;
2827	height = `0`;
2828	mode = InvalidDft;
2829	elem_size = `0`;
2830	complex_elem_size = `0`;
2831	depth = `0`;
2832	real_transform = false;
2833	nonzero_rows = `0`;
2834	isRowTransform = false;
2835	isScaled = false;
2836	useIpp = false;
2837	src_channels = `0`;
2838	dst_channels = `0`;
2839	}
2840
2841	void init(int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows)
2842	{
2843	bool isComplex = _src_channels != _dst_channels;
2844	nonzero_rows = _nonzero_rows;
2845	width = _width;
2846	height = _height;
2847	depth = _depth;
2848	src_channels = _src_channels;
2849	dst_channels = _dst_channels;
2850	bool isInverse = (flags & CV_HAL_DFT_INVERSE) != `0`;
2851	bool isInplace = (flags & CV_HAL_DFT_IS_INPLACE) != `0`;
2852	bool isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != `0`;
2853	mode = determineMode(inv: isInverse, cn1: _src_channels, cn2: _dst_channels);
2854	inv = isInverse;
2855	isRowTransform = (flags & CV_HAL_DFT_ROWS) != `0`;
2856	isScaled = (flags & CV_HAL_DFT_SCALE) != `0`;
2857	needBufferA = false;
2858	needBufferB = false;
2859	real_transform = (mode != FwdComplexToComplex && mode != InvComplexToComplex);
2860
2861	elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
2862	complex_elem_size = elem_size * `2`;
2863	if( !real_transform )
2864	elem_size = complex_elem_size;
2865
2866	#if defined USE_IPP_DFT
2867	CV_IPP_CHECK()
2868	{
2869	if (nonzero_rows == `0` && depth == CV_32F && ((width * height)>(int)(`1`<<`6`)))
2870	{
2871	if (mode == FwdComplexToComplex \|\| mode == InvComplexToComplex \|\| mode == FwdRealToCCS \|\| mode == InvCCSToReal)
2872	{
2873	useIpp = true;
2874	return;
2875	}
2876	}
2877	}
2878	#endif
2879
2880	DftDims dims = determineDims(rows: height, cols: width, isRowWise: isRowTransform, isContinuous);
2881	if (dims == TwoDims)
2882	{
2883	stages.resize(new_size: `2`);
2884	if (mode == InvCCSToReal \|\| mode == InvComplexToReal)
2885	{
2886	stages [`0`] = `1`;
2887	stages [`1`] = `0`;
2888	}
2889	else
2890	{
2891	stages [`0`] = `0`;
2892	stages [`1`] = `1`;
2893	}
2894	}
2895	else
2896	{
2897	stages.resize(new_size: `1`);
2898	if (dims == OneDimColWise)
2899	stages [`0`] = `1`;
2900	else
2901	stages [`0`] = `0`;
2902	}
2903
2904	for(uint stageIndex = `0`; stageIndex < stages.size(); ++stageIndex)
2905	{
2906	if (stageIndex == `1`)
2907	{
2908	isInplace = true;
2909	isComplex = false;
2910	}
2911
2912	int stage = stages [stageIndex];
2913	bool isLastStage = (stageIndex + `1` == stages.size());
2914
2915	int len, count;
2916
2917	int f = `0`;
2918	if (inv)
2919	f \|= CV_HAL_DFT_INVERSE;
2920	if (isScaled)
2921	f \|= CV_HAL_DFT_SCALE;
2922	if (isRowTransform)
2923	f \|= CV_HAL_DFT_ROWS;
2924	if (isComplex)
2925	f \|= CV_HAL_DFT_COMPLEX_OUTPUT;
2926	if (real_transform)
2927	f \|= CV_HAL_DFT_REAL_OUTPUT;
2928	if (!isLastStage)
2929	f \|= CV_HAL_DFT_TWO_STAGE;
2930
2931	if( stage == `0` ) // row-wise transform
2932	{
2933	if (width == `1` && !isRowTransform )
2934	{
2935	len = height;
2936	count = width;
2937	}
2938	else
2939	{
2940	len = width;
2941	count = height;
2942	}
2943	needBufferA = isInplace;
2944	contextA = hal::DFT1D::create(len, count, depth, flags: f, useBuffer: &needBufferA);
2945	if (needBufferA)
2946	tmp_bufA.allocate(size: len * complex_elem_size);
2947	}
2948	else
2949	{
2950	len = height;
2951	count = width;
2952	f \|= CV_HAL_DFT_STAGE_COLS;
2953	needBufferB = isInplace;
2954	contextB = hal::DFT1D::create(len, count, depth, flags: f, useBuffer: &needBufferB);
2955	if (needBufferB)
2956	tmp_bufB.allocate(size: len * complex_elem_size);
2957
2958	buf0.allocate(size: len * complex_elem_size);
2959	buf1.allocate(size: len * complex_elem_size);
2960	}
2961	}
2962	}
2963
2964	void apply(const uchar * src, size_t src_step, uchar * dst, size_t dst_step) CV_OVERRIDE
2965	{
2966	#if defined USE_IPP_DFT
2967	if (useIpp)
2968	{
2969	int ipp_norm_flag = !isScaled ? `8` : inv ? `2` : `1`;
2970	if (!isRowTransform)
2971	{
2972	if (mode == FwdComplexToComplex \|\| mode == InvComplexToComplex)
2973	{
2974	if (ippi_DFT_C_32F(src, src_step, dst, dst_step, width, height, inv, norm_flag: ipp_norm_flag))
2975	{
2976	CV_IMPL_ADD(CV_IMPL_IPP);
2977	return;
2978	}
2979	setIppErrorStatus();
2980	}
2981	else if (mode == FwdRealToCCS \|\| mode == InvCCSToReal)
2982	{
2983	if (ippi_DFT_R_32F(src, src_step, dst, dst_step, width, height, inv, norm_flag: ipp_norm_flag))
2984	{
2985	CV_IMPL_ADD(CV_IMPL_IPP);
2986	return;
2987	}
2988	setIppErrorStatus();
2989	}
2990	}
2991	else
2992	{
2993	if (mode == FwdComplexToComplex \|\| mode == InvComplexToComplex)
2994	{
2995	ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R;
2996	if (Dft_C_IPPLoop(src, src_step, dst, dst_step, width, height, ippidft: IPPDFT_C_Functor (ippiFunc),norm_flag: ipp_norm_flag))
2997	{
2998	CV_IMPL_ADD(CV_IMPL_IPP\|CV_IMPL_MT);
2999	return;
3000	}
3001	setIppErrorStatus();
3002	}
3003	else if (mode == FwdRealToCCS \|\| mode == InvCCSToReal)
3004	{
3005	ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R;
3006	if (Dft_R_IPPLoop(src, src_step, dst, dst_step, width, height, ippidft: IPPDFT_R_Functor (ippiFunc),norm_flag: ipp_norm_flag))
3007	{
3008	CV_IMPL_ADD(CV_IMPL_IPP\|CV_IMPL_MT);
3009	return;
3010	}
3011	setIppErrorStatus();
3012	}
3013	}
3014	return;
3015	}
3016	#endif
3017
3018	for(uint stageIndex = `0`; stageIndex < stages.size(); ++stageIndex)
3019	{
3020	int stage_src_channels = src_channels;
3021	int stage_dst_channels = dst_channels;
3022
3023	if (stageIndex == `1`)
3024	{
3025	src = dst;
3026	src_step = dst_step;
3027	stage_src_channels = stage_dst_channels;
3028	}
3029
3030	int stage = stages [stageIndex];
3031	bool isLastStage = (stageIndex + `1` == stages.size());
3032	bool isComplex = stage_src_channels != stage_dst_channels;
3033
3034	if( stage == `0` )
3035	rowDft(src_data: src, src_step, dst_data: dst, dst_step, isComplex, isLastStage);
3036	else
3037	colDft(src_data: src, src_step, dst_data: dst, dst_step, stage_src_channels, stage_dst_channels, isLastStage);
3038	}
3039	}
3040
3041	protected:
3042
3043	void rowDft(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, bool isComplex, bool isLastStage)
3044	{
3045	int len, count;
3046	if (width == `1` && !isRowTransform )
3047	{
3048	len = height;
3049	count = width;
3050	}
3051	else
3052	{
3053	len = width;
3054	count = height;
3055	}
3056	int dptr_offset = `0`;
3057	int dst_full_len = len*elem_size;
3058
3059	if( needBufferA )
3060	{
3061	if (mode == FwdRealToCCS && (len & `1`) && len > `1`)
3062	dptr_offset = elem_size;
3063	}
3064
3065	if( !inv && isComplex )
3066	dst_full_len += (len & `1`) ? elem_size : complex_elem_size;
3067
3068	int nz = nonzero_rows;
3069	if( nz <= `0` \|\| nz > count )
3070	nz = count;
3071
3072	int i;
3073	for( i = `0`; i < nz; i++ )
3074	{
3075	const uchar* sptr = src_data + src_step * i;
3076	uchar* dptr0 = dst_data + dst_step * i;
3077	uchar* dptr = dptr0;
3078
3079	if( needBufferA )
3080	dptr = tmp_bufA.data();
3081
3082	contextA ->apply(src: sptr, dst: dptr);
3083
3084	if( needBufferA )
3085	memcpy( dest: dptr0, src: dptr + dptr_offset, n: dst_full_len );
3086	}
3087
3088	for( ; i < count; i++ )
3089	{
3090	uchar* dptr0 = dst_data + dst_step * i;
3091	memset( s: dptr0, c: `0`, n: dst_full_len );
3092	}
3093	if(isLastStage && mode == FwdRealToComplex)
3094	complementComplexOutput(depth, ptr: dst_data, step: dst_step, count: len, len: nz, dft_dims: `1`);
3095	}
3096
3097	void colDft(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage)
3098	{
3099	int len = height;
3100	int count = width;
3101	int a = `0`, b = count;
3102	uchar dbuf0, dbuf1;
3103	const uchar* sptr0 = src_data;
3104	uchar* dptr0 = dst_data;
3105
3106	dbuf0 = buf0.data(), dbuf1 = buf1.data();
3107
3108	if( needBufferB )
3109	{
3110	dbuf1 = tmp_bufB.data();
3111	dbuf0 = buf1.data();
3112	}
3113
3114	if( real_transform )
3115	{
3116	int even;
3117	a = `1`;
3118	even = (count & `1`) == `0`;
3119	b = (count+`1`)/`2`;
3120	if( !inv )
3121	{
3122	memset( s: buf0.data(), c: `0`, n: len*complex_elem_size );
3123	CopyColumn( src: sptr0, src_step, dst: buf0.data(), dst_step: complex_elem_size, len, elem_size );
3124	sptr0 += stage_dst_channels*elem_size;
3125	if( even )
3126	{
3127	memset( s: buf1.data(), c: `0`, n: len*complex_elem_size );
3128	CopyColumn( src: sptr0 + (count-`2`)*elem_size, src_step,
3129	dst: buf1.data(), dst_step: complex_elem_size, len, elem_size );
3130	}
3131	}
3132	else if( stage_src_channels == `1` )
3133	{
3134	CopyColumn( src: sptr0, src_step, dst: buf0.data(), dst_step: elem_size, len, elem_size );
3135	ExpandCCS( ptr: buf0.data(), n: len, elem_size );
3136	if( even )
3137	{
3138	CopyColumn( src: sptr0 + (count-`1`)*elem_size, src_step,
3139	dst: buf1.data(), dst_step: elem_size, len, elem_size );
3140	ExpandCCS( ptr: buf1.data(), n: len, elem_size );
3141	}
3142	sptr0 += elem_size;
3143	}
3144	else
3145	{
3146	CopyColumn( src: sptr0, src_step, dst: buf0.data(), dst_step: complex_elem_size, len, elem_size: complex_elem_size );
3147	if( even )
3148	{
3149	CopyColumn( src: sptr0 + b*complex_elem_size, src_step,
3150	dst: buf1.data(), dst_step: complex_elem_size, len, elem_size: complex_elem_size );
3151	}
3152	sptr0 += complex_elem_size;
3153	}
3154
3155	if( even )
3156	contextB ->apply(src: buf1.data(), dst: dbuf1);
3157	contextB ->apply(src: buf0.data(), dst: dbuf0);
3158
3159	if( stage_dst_channels == `1` )
3160	{
3161	if( !inv )
3162	{
3163	// copy the half of output vector to the first/last column.
3164	// before doing that, defgragment the vector
3165	memcpy( dest: dbuf0 + elem_size, src: dbuf0, n: elem_size );
3166	CopyColumn( src: dbuf0 + elem_size, src_step: elem_size, dst: dptr0,
3167	dst_step, len, elem_size );
3168	if( even )
3169	{
3170	memcpy( dest: dbuf1 + elem_size, src: dbuf1, n: elem_size );
3171	CopyColumn( src: dbuf1 + elem_size, src_step: elem_size,
3172	dst: dptr0 + (count-`1`)*elem_size,
3173	dst_step, len, elem_size );
3174	}
3175	dptr0 += elem_size;
3176	}
3177	else
3178	{
3179	// copy the real part of the complex vector to the first/last column
3180	CopyColumn( src: dbuf0, src_step: complex_elem_size, dst: dptr0, dst_step, len, elem_size );
3181	if( even )
3182	CopyColumn( src: dbuf1, src_step: complex_elem_size, dst: dptr0 + (count-`1`)*elem_size,
3183	dst_step, len, elem_size );
3184	dptr0 += elem_size;
3185	}
3186	}
3187	else
3188	{
3189	CV_Assert( !inv );
3190	CopyColumn( src: dbuf0, src_step: complex_elem_size, dst: dptr0,
3191	dst_step, len, elem_size: complex_elem_size );
3192	if( even )
3193	CopyColumn( src: dbuf1, src_step: complex_elem_size,
3194	dst: dptr0 + b*complex_elem_size,
3195	dst_step, len, elem_size: complex_elem_size );
3196	dptr0 += complex_elem_size;
3197	}
3198	}
3199
3200	for(int i = a; i < b; i += `2` )
3201	{
3202	if( i+`1` < b )
3203	{
3204	CopyFrom2Columns( src: sptr0, src_step, dst0: buf0.data(), dst1: buf1.data(), len, elem_size: complex_elem_size );
3205	contextB ->apply(src: buf1.data(), dst: dbuf1);
3206	}
3207	else
3208	CopyColumn( src: sptr0, src_step, dst: buf0.data(), dst_step: complex_elem_size, len, elem_size: complex_elem_size );
3209
3210	contextB ->apply(src: buf0.data(), dst: dbuf0);
3211
3212	if( i+`1` < b )
3213	CopyTo2Columns( src0: dbuf0, src1: dbuf1, dst: dptr0, dst_step, len, elem_size: complex_elem_size );
3214	else
3215	CopyColumn( src: dbuf0, src_step: complex_elem_size, dst: dptr0, dst_step, len, elem_size: complex_elem_size );
3216	sptr0 += `2`*complex_elem_size;
3217	dptr0 += `2`*complex_elem_size;
3218	}
3219	if(isLastStage && mode == FwdRealToComplex)
3220	complementComplexOutput(depth, ptr: dst_data, step: dst_step, count, len, dft_dims: `2`);
3221	}
3222	};
3223
3224	class OcvDftBasicImpl CV_FINAL : public hal::DFT1D
3225	{
3226	public:
3227	OcvDftOptions opt;
3228	int _factors[`34`];
3229	AutoBuffer<uchar> wave_buf;
3230	AutoBuffer<int> itab_buf;
3231	#ifdef USE_IPP_DFT
3232	AutoBuffer<uchar> ippbuf;
3233	AutoBuffer<uchar> ippworkbuf;
3234	#endif
3235
3236	public:
3237	OcvDftBasicImpl()
3238	{
3239	opt.factors = _factors;
3240	}
3241	void init(int len, int count, int depth, int flags, bool *needBuffer)
3242	{
3243	int prev_len = opt.n;
3244
3245	int stage = (flags & CV_HAL_DFT_STAGE_COLS) != `0` ? `1` : `0`;
3246	int complex_elem_size = depth == CV_32F ? sizeof(Complex<float>) : sizeof(Complex<double>);
3247	opt.isInverse = (flags & CV_HAL_DFT_INVERSE) != `0`;
3248	bool real_transform = (flags & CV_HAL_DFT_REAL_OUTPUT) != `0`;
3249	opt.isComplex = (stage == `0`) && (flags & CV_HAL_DFT_COMPLEX_OUTPUT) != `0`;
3250	bool needAnotherStage = (flags & CV_HAL_DFT_TWO_STAGE) != `0`;
3251
3252	opt.scale = `1`;
3253	opt.tab_size = len;
3254	opt.n = len;
3255
3256	opt.useIpp = false;
3257	#ifdef USE_IPP_DFT
3258	opt.ipp_spec = `0`;
3259	opt.ipp_work = `0`;
3260
3261	if( CV_IPP_CHECK_COND && (opt.ncount >= `64`) ) // use IPP DFT if available*
3262	{
3263	int ipp_norm_flag = (flags & CV_HAL_DFT_SCALE) == `0` ? `8` : opt.isInverse ? `2` : `1`;
3264	int specsize=`0`, initsize=`0`, worksize=`0`;
3265	IppDFTGetSizeFunc getSizeFunc = `0`;
3266	IppDFTInitFunc initFunc = `0`;
3267
3268	if( real_transform && stage == `0` )
3269	{
3270	if( depth == CV_32F )
3271	{
3272	getSizeFunc = ippsDFTGetSize_R_32f;
3273	initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
3274	}
3275	else
3276	{
3277	getSizeFunc = ippsDFTGetSize_R_64f;
3278	initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
3279	}
3280	}
3281	else
3282	{
3283	if( depth == CV_32F )
3284	{
3285	getSizeFunc = ippsDFTGetSize_C_32fc;
3286	initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
3287	}
3288	else
3289	{
3290	getSizeFunc = ippsDFTGetSize_C_64fc;
3291	initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
3292	}
3293	}
3294	if( getSizeFunc(opt.n, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= `0` )
3295	{
3296	ippbuf.allocate(size: specsize + initsize + `64`);
3297	opt.ipp_spec = alignPtr(ptr: &ippbuf [`0`], n: `32`);
3298	ippworkbuf.allocate(size: worksize + `32`);
3299	opt.ipp_work = alignPtr(ptr: &ippworkbuf [`0`], n: `32`);
3300	uchar* initbuf = alignPtr(ptr: (uchar*)opt.ipp_spec + specsize, n: `32`);
3301	if( initFunc(opt.n, ipp_norm_flag, ippAlgHintNone, opt.ipp_spec, initbuf) >= `0` )
3302	opt.useIpp = true;
3303	}
3304	else
3305	setIppErrorStatus();
3306	}
3307	#endif
3308
3309	if (!opt.useIpp)
3310	{
3311	if (len != prev_len)
3312	{
3313	opt.nf = DFTFactorize( n: opt.n, factors: opt.factors );
3314	}
3315	bool inplace_transform = opt.factors[`0`] == opt.factors[opt.nf-`1`];
3316	if (len != prev_len \|\| (!inplace_transform && opt.isInverse && real_transform))
3317	{
3318	wave_buf.allocate(size: opt.n*complex_elem_size);
3319	opt.wave = wave_buf.data();
3320	itab_buf.allocate(size: opt.n);
3321	opt.itab = itab_buf.data();
3322	DFTInit( n0: opt.n, nf: opt.nf, factors: opt.factors, itab: opt.itab, elem_size: complex_elem_size,
3323	wave: opt.wave, inv_itab: stage == `0` && opt.isInverse && real_transform );
3324	}
3325	// otherwise reuse the tables calculated on the previous stage
3326	if (needBuffer)
3327	{
3328	if( (stage == `0` && ((*needBuffer && !inplace_transform) \|\| (real_transform && (len & `1`)))) \|\|
3329	(stage == `1` && !inplace_transform) )
3330	{
3331	needBuffer = true*;
3332	}
3333	}
3334	}
3335	else
3336	{
3337	if (needBuffer)
3338	{
3339	needBuffer = false*;
3340	}
3341	}
3342
3343	{
3344	static DFTFunc dft_tbl[`6`] =
3345	{
3346	(DFTFunc)DFT_32f,
3347	(DFTFunc)RealDFT_32f,
3348	(DFTFunc)CCSIDFT_32f,
3349	(DFTFunc)DFT_64f,
3350	(DFTFunc)RealDFT_64f,
3351	(DFTFunc)CCSIDFT_64f
3352	};
3353	int idx = `0`;
3354	if (stage == `0`)
3355	{
3356	if (real_transform)
3357	{
3358	if (!opt.isInverse)
3359	idx = `1`;
3360	else
3361	idx = `2`;
3362	}
3363	}
3364	if (depth == CV_64F)
3365	idx += `3`;
3366
3367	opt.dft_func = dft_tbl[idx];
3368	}
3369
3370	if(!needAnotherStage && (flags & CV_HAL_DFT_SCALE) != `0`)
3371	{
3372	int rowCount = count;
3373	if (stage == `0` && (flags & CV_HAL_DFT_ROWS) != `0`)
3374	rowCount = `1`;
3375	opt.scale = `1.`/(len * rowCount);
3376	}
3377	}
3378
3379	void apply(const uchar src, uchar dst) CV_OVERRIDE
3380	{
3381	opt.dft_func(opt, src, dst);
3382	}
3383
3384	void free() {}
3385	};
3386
3387	struct ReplacementDFT1D : public hal::DFT1D
3388	{
3389	cvhalDFT *context;
3390	bool isInitialized;
3391
3392	ReplacementDFT1D() : context(`0`), isInitialized(false) {}
3393	bool init(int len, int count, int depth, int flags, bool *needBuffer)
3394	{
3395	int res = cv_hal_dftInit1D(context: &context, len, count, depth, flags, needBuffer);
3396	isInitialized = (res == CV_HAL_ERROR_OK);
3397	return isInitialized;
3398	}
3399	void apply(const uchar src, uchar dst) CV_OVERRIDE
3400	{
3401	if (isInitialized)
3402	{
3403	CALL_HAL(dft1D, cv_hal_dft1D, context, src, dst);
3404	}
3405	}
3406	~ReplacementDFT1D()
3407	{
3408	if (isInitialized)
3409	{
3410	CALL_HAL(dftFree1D, cv_hal_dftFree1D, context);
3411	}
3412	}
3413	};
3414
3415	struct ReplacementDFT2D : public hal::DFT2D
3416	{
3417	cvhalDFT *context;
3418	bool isInitialized;
3419
3420	ReplacementDFT2D() : context(`0`), isInitialized(false) {}
3421	bool init(int width, int height, int depth,
3422	int src_channels, int dst_channels,
3423	int flags, int nonzero_rows)
3424	{
3425	int res = cv_hal_dftInit2D(context: &context, width, height, depth, src_channels, dst_channels, flags, nonzero_rows);
3426	isInitialized = (res == CV_HAL_ERROR_OK);
3427	return isInitialized;
3428	}
3429	void apply(const uchar src, size_t src_step, uchar dst, size_t dst_step) CV_OVERRIDE
3430	{
3431	if (isInitialized)
3432	{
3433	CALL_HAL(dft2D, cv_hal_dft2D, context, src, src_step, dst, dst_step);
3434	}
3435	}
3436	~ReplacementDFT2D()
3437	{
3438	if (isInitialized)
3439	{
3440	CALL_HAL(dftFree2D, cv_hal_dftFree1D, context);
3441	}
3442	}
3443	};
3444
3445	namespace hal {
3446
3447	//================== 1D ======================
3448
3449	Ptr<DFT1D> DFT1D::create(int len, int count, int depth, int flags, bool *needBuffer)
3450	{
3451	{
3452	ReplacementDFT1D impl = new* ReplacementDFT1D ();
3453	if (impl->init(len, count, depth, flags, needBuffer))
3454	{
3455	return Ptr<DFT1D>(impl);
3456	}
3457	delete impl;
3458	}
3459	{
3460	OcvDftBasicImpl impl = new* OcvDftBasicImpl ();
3461	impl->init(len, count, depth, flags, needBuffer);
3462	return Ptr<DFT1D>(impl);
3463	}
3464	}
3465
3466	//================== 2D ======================
3467
3468	Ptr<DFT2D> DFT2D::create(int width, int height, int depth,
3469	int src_channels, int dst_channels,
3470	int flags, int nonzero_rows)
3471	{
3472	{
3473	ReplacementDFT2D impl = new* ReplacementDFT2D ();
3474	if (impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows))
3475	{
3476	return Ptr<DFT2D>(impl);
3477	}
3478	delete impl;
3479	}
3480	{
3481	if(width == `1` && nonzero_rows > `0` )
3482	{
3483	CV_Error( cv::Error::StsNotImplemented,
3484	"This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
3485	"For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
3486	}
3487	OcvDftImpl impl = new* OcvDftImpl ();
3488	impl->init(width: width, height: height, depth: depth, src_channels: src_channels, dst_channels: dst_channels, flags, nonzero_rows: nonzero_rows);
3489	return Ptr<DFT2D>(impl);
3490	}
3491	}
3492
3493	} // cv::hal::
3494	} // cv::
3495
3496
3497	void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
3498	{
3499	CV_INSTRUMENT_REGION();
3500
3501	#ifdef HAVE_CLAMDFFT
3502	CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
3503	_dst.isUMat() && _src0.dims() <= `2` && nonzero_rows == `0`,
3504	ocl_dft_amdfft(_src0, _dst, flags))
3505	#endif
3506
3507	#ifdef HAVE_OPENCL
3508	CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= `2`,
3509	ocl_dft(src: _src0, _dst, flags, nonzero_rows))
3510	#endif
3511
3512	Mat src0 = _src0.getMat(), src = src0;
3513	bool inv = (flags & DFT_INVERSE) != `0`;
3514	int type = src.type();
3515	int depth = src.depth();
3516
3517	CV_Assert( type == CV_32FC1 \|\| type == CV_32FC2 \|\| type == CV_64FC1 \|\| type == CV_64FC2 );
3518
3519	// Fail if DFT_COMPLEX_INPUT is specified, but src is not 2 channels.
3520	CV_Assert( !((flags & DFT_COMPLEX_INPUT) && src.channels() != `2`) );
3521
3522	if( !inv && src.channels() == `1` && (flags & DFT_COMPLEX_OUTPUT) )
3523	_dst.create( sz: src.size (), CV_MAKETYPE(depth, `2`) );
3524	else if( inv && src.channels() == `2` && (flags & DFT_REAL_OUTPUT) )
3525	_dst.create( sz: src.size (), type: depth );
3526	else
3527	_dst.create( sz: src.size (), type );
3528
3529	Mat dst = _dst.getMat();
3530
3531	int f = `0`;
3532	if (src.isContinuous() && dst.isContinuous())
3533	f \|= CV_HAL_DFT_IS_CONTINUOUS;
3534	if (inv)
3535	f \|= CV_HAL_DFT_INVERSE;
3536	if (flags & DFT_ROWS)
3537	f \|= CV_HAL_DFT_ROWS;
3538	if (flags & DFT_SCALE)
3539	f \|= CV_HAL_DFT_SCALE;
3540	if (src.data == dst.data)
3541	f \|= CV_HAL_DFT_IS_INPLACE;
3542	Ptr<hal::DFT2D> c = hal::DFT2D::create(width: src.cols, height: src.rows, depth, src_channels: src.channels(), dst_channels: dst.channels(), flags: f, nonzero_rows);
3543	c ->apply(src_data: src.data, src_step: src.step, dst_data: dst.data, dst_step: dst.step);
3544	}
3545
3546
3547	void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows )
3548	{
3549	CV_INSTRUMENT_REGION();
3550
3551	dft( src0: src, dst: dst, flags: flags \| DFT_INVERSE, nonzero_rows );
3552	}
3553
3554	#ifdef HAVE_OPENCL
3555
3556	namespace cv {
3557
3558	static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB,
3559	OutputArray _dst, int flags, bool conjB )
3560	{
3561	int atype = _srcA.type(), btype = _srcB.type(),
3562	rowsPerWI = ocl::Device::getDefault().isIntel() ? `4` : `1`;
3563	Size asize = _srcA.size(), bsize = _srcB.size();
3564	CV_Assert(asize == bsize);
3565
3566	if ( !(atype == CV_32FC2 && btype == CV_32FC2) \|\| flags != `0` )
3567	return false;
3568
3569	UMat A = _srcA.getUMat(), B = _srcB.getUMat();
3570	CV_Assert(A.size() == B.size());
3571
3572	_dst.create(sz: A.size (), type: atype);
3573	UMat dst = _dst.getUMat();
3574
3575	ocl::Kernel k("mulAndScaleSpectrums",
3576	ocl::core::mulspectrums_oclsrc,
3577	format("%s", conjB ? "-D CONJ" : ""));
3578	if (k.empty())
3579	return false;
3580
3581	k.args(kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: A), kernel_args: ocl::KernelArg::ReadOnlyNoSize(m: B),
3582	kernel_args: ocl::KernelArg::WriteOnly(m: dst), kernel_args: rowsPerWI);
3583
3584	size_t globalsize[`2`] = { (size_t)asize.width, ((size_t)asize.height + rowsPerWI - `1`) / rowsPerWI };
3585	return k.run(dims: `2`, globalsize, NULL, sync: false);
3586	}
3587
3588	}
3589
3590	#endif
3591
3592	namespace {
3593
3594	#define VAL(buf, elem) (((T)((char)data ## buf + (step ## buf * (elem))))[0])
3595	#define MUL_SPECTRUMS_COL(A, B, C) \
3596	VAL(C, 0) = VAL(A, 0) * VAL(B, 0); \
3597	for (size_t j = 1; j <= rows - 2; j += 2) \
3598	{ \
3599	double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
3600	double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
3601	if (conjB) b_im = -b_im; \
3602	double c_re = a_re * b_re - a_im * b_im; \
3603	double c_im = a_re * b_im + a_im * b_re; \
3604	VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
3605	} \
3606	if ((rows & 1) == 0) \
3607	VAL(C, rows-1) = VAL(A, rows-1) * VAL(B, rows-1)
3608
3609	template <typename T, bool conjB> static inline
3610	void mulSpectrums_processCol_noinplace(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
3611	{
3612	MUL_SPECTRUMS_COL(A, B, C);
3613	}
3614
3615	template <typename T, bool conjB> static inline
3616	void mulSpectrums_processCol_inplaceA(const T* dataB, T* dataAC, size_t stepB, size_t stepAC, size_t rows)
3617	{
3618	MUL_SPECTRUMS_COL(AC, B, AC);
3619	}
3620	template <typename T, bool conjB, bool inplaceA> static inline
3621	void mulSpectrums_processCol(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
3622	{
3623	if (inplaceA)
3624	mulSpectrums_processCol_inplaceA<T, conjB>(dataB, dataC, stepB, stepC, rows);
3625	else
3626	mulSpectrums_processCol_noinplace<T, conjB>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
3627	}
3628	#undef MUL_SPECTRUMS_COL
3629	#undef VAL
3630
3631	template <typename T, bool conjB, bool inplaceA> static inline
3632	void mulSpectrums_processCols(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols)
3633	{
3634	mulSpectrums_processCol<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
3635	if ((cols & `1`) == `0`)
3636	{
3637	mulSpectrums_processCol<T, conjB, inplaceA>(dataA + cols - `1`, dataB + cols - `1`, dataC + cols - `1`, stepA, stepB, stepC, rows);
3638	}
3639	}
3640
3641	#define VAL(buf, elem) (data ## buf[(elem)])
3642	#define MUL_SPECTRUMS_ROW(A, B, C) \
3643	for (size_t j = j0; j < j1; j += 2) \
3644	{ \
3645	double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
3646	double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
3647	if (conjB) b_im = -b_im; \
3648	double c_re = a_re * b_re - a_im * b_im; \
3649	double c_im = a_re * b_im + a_im * b_re; \
3650	VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
3651	}
3652	template <typename T, bool conjB> static inline
3653	void mulSpectrums_processRow_noinplace(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
3654	{
3655	MUL_SPECTRUMS_ROW(A, B, C);
3656	}
3657	template <typename T, bool conjB> static inline
3658	void mulSpectrums_processRow_inplaceA(const T* dataB, T* dataAC, size_t j0, size_t j1)
3659	{
3660	MUL_SPECTRUMS_ROW(AC, B, AC);
3661	}
3662	template <typename T, bool conjB, bool inplaceA> static inline
3663	void mulSpectrums_processRow(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
3664	{
3665	if (inplaceA)
3666	mulSpectrums_processRow_inplaceA<T, conjB>(dataB, dataC, j0, j1);
3667	else
3668	mulSpectrums_processRow_noinplace<T, conjB>(dataA, dataB, dataC, j0, j1);
3669	}
3670	#undef MUL_SPECTRUMS_ROW
3671	#undef VAL
3672
3673	template <typename T, bool conjB, bool inplaceA> static inline
3674	void mulSpectrums_processRows(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d_CN1)
3675	{
3676	while (rows-- > `0`)
3677	{
3678	if (is_1d_CN1)
3679	dataC[`0`] = dataA[`0`]*dataB[`0`];
3680	mulSpectrums_processRow<T, conjB, inplaceA>(dataA, dataB, dataC, j0, j1);
3681	if (is_1d_CN1 && (cols & `1`) == `0`)
3682	dataC[j1] = dataA[j1]*dataB[j1];
3683
3684	dataA = (const T)(((char**)dataA) + stepA);
3685	dataB = (const T)(((char**)dataB) + stepB);
3686	dataC = (T)(((char**)dataC) + stepC);
3687	}
3688	}
3689
3690
3691	template <typename T, bool conjB, bool inplaceA> static inline
3692	void mulSpectrums_Impl_(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
3693	{
3694	if (!is_1d && isCN1)
3695	{
3696	mulSpectrums_processCols<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols);
3697	}
3698	mulSpectrums_processRows<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d && isCN1);
3699	}
3700	template <typename T, bool conjB> static inline
3701	void mulSpectrums_Impl(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
3702	{
3703	if (dataA == dataC)
3704	mulSpectrums_Impl_<T, conjB, true>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
3705	else
3706	mulSpectrums_Impl_<T, conjB, false>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
3707	}
3708
3709	} // namespace
3710
3711	void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
3712	OutputArray _dst, int flags, bool conjB )
3713	{
3714	CV_INSTRUMENT_REGION();
3715
3716	CV_OCL_RUN(_dst.isUMat() && _srcA.dims() <= `2` && _srcB.dims() <= `2`,
3717	ocl_mulSpectrums(_srcA, _srcB, _dst, flags, conjB))
3718
3719	Mat srcA = _srcA.getMat(), srcB = _srcB.getMat();
3720	int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type();
3721	size_t rows = srcA.rows, cols = srcA.cols;
3722
3723	CV_Assert( type == srcB.type() && srcA.size() == srcB.size() );
3724	CV_Assert( type == CV_32FC1 \|\| type == CV_32FC2 \|\| type == CV_64FC1 \|\| type == CV_64FC2 );
3725
3726	_dst.create( rows: srcA.rows, cols: srcA.cols, type );
3727	Mat dst = _dst.getMat();
3728
3729	// correct inplace support
3730	// Case 'dst.data == srcA.data' is handled by implementation,
3731	// because it is used frequently (filter2D, matchTemplate)
3732	if (dst.data == srcB.data)
3733	srcB = srcB.clone(); // workaround for B only
3734
3735	bool is_1d = (flags & DFT_ROWS)
3736	\|\| (rows == `1`)
3737	\|\| (cols == `1` && srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous());
3738
3739	if( is_1d && !(flags & DFT_ROWS) )
3740	cols = cols + rows - `1`, rows = `1`;
3741
3742	bool isCN1 = cn == `1`;
3743	size_t j0 = isCN1 ? `1` : `0`;
3744	size_t j1 = cols*cn - (((cols & `1`) == `0` && cn == `1`) ? `1` : `0`);
3745
3746	if (depth == CV_32F)
3747	{
3748	const float* dataA = srcA.ptr<float>();
3749	const float* dataB = srcB.ptr<float>();
3750	float* dataC = dst.ptr<float>();
3751	if (!conjB)
3752	mulSpectrums_Impl<float, false>(dataA, dataB, dataC, stepA: srcA.step, stepB: srcB.step, stepC: dst.step, rows, cols, j0, j1, is_1d, isCN1);
3753	else
3754	mulSpectrums_Impl<float, true>(dataA, dataB, dataC, stepA: srcA.step, stepB: srcB.step, stepC: dst.step, rows, cols, j0, j1, is_1d, isCN1);
3755	}
3756	else
3757	{
3758	const double* dataA = srcA.ptr<double>();
3759	const double* dataB = srcB.ptr<double>();
3760	double* dataC = dst.ptr<double>();
3761	if (!conjB)
3762	mulSpectrums_Impl<double, false>(dataA, dataB, dataC, stepA: srcA.step, stepB: srcB.step, stepC: dst.step, rows, cols, j0, j1, is_1d, isCN1);
3763	else
3764	mulSpectrums_Impl<double, true>(dataA, dataB, dataC, stepA: srcA.step, stepB: srcB.step, stepC: dst.step, rows, cols, j0, j1, is_1d, isCN1);
3765	}
3766	}
3767
3768
3769	/**************************************************************************************\
3770	Discrete Cosine Transform
3771	\**************************************************************************************/
3772
3773	namespace cv
3774	{
3775
3776	/ DCT is calculated using DFT, as described here:*
3777	http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/:
3778	*/
3779	template<typename T> static void
3780	DCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step,
3781	const Complex<T>* dct_wave )
3782	{
3783	static const T sin_45 = (T)`0.70710678118654752440084436210485`;
3784
3785	int n = c.n;
3786	int j, n2 = n >> `1`;
3787
3788	src_step /= sizeof(src[`0`]);
3789	dst_step /= sizeof(dst[`0`]);
3790	T* dst1 = dst + (n-`1`)*dst_step;
3791
3792	if( n == `1` )
3793	{
3794	dst[`0`] = src[`0`];
3795	return;
3796	}
3797
3798	for( j = `0`; j < n2; j++, src += src_step*`2` )
3799	{
3800	dft_src[j] = src[`0`];
3801	dft_src[n-j-`1`] = src[src_step];
3802	}
3803
3804	RealDFT(c, dft_src, dft_dst);
3805	src = dft_dst;
3806
3807	dst[`0`] = (T)(src[`0`]dct_wave->resin_45);
3808	dst += dst_step;
3809	for( j = `1`, dct_wave++; j < n2; j++, dct_wave++,
3810	dst += dst_step, dst1 -= dst_step )
3811	{
3812	T t0 = dct_wave->resrc[j`2`-`1`] - dct_wave->imsrc[j`2`];
3813	T t1 = -dct_wave->imsrc[j`2`-`1`] - dct_wave->resrc[j`2`];
3814	dst[`0`] = t0;
3815	dst1[`0`] = t1;
3816	}
3817
3818	dst[`0`] = src[n-`1`]*dct_wave->re;
3819	}
3820
3821
3822	template<typename T> static void
3823	IDCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step,
3824	const Complex<T>* dct_wave)
3825	{
3826	static const T sin_45 = (T)`0.70710678118654752440084436210485`;
3827	int n = c.n;
3828	int j, n2 = n >> `1`;
3829
3830	src_step /= sizeof(src[`0`]);
3831	dst_step /= sizeof(dst[`0`]);
3832	const T* src1 = src + (n-`1`)*src_step;
3833
3834	if( n == `1` )
3835	{
3836	dst[`0`] = src[`0`];
3837	return;
3838	}
3839
3840	dft_src[`0`] = (T)(src[`0`]`2`dct_wave->re*sin_45);
3841	src += src_step;
3842	for( j = `1`, dct_wave++; j < n2; j++, dct_wave++,
3843	src += src_step, src1 -= src_step )
3844	{
3845	T t0 = dct_wave->resrc[`0`] - dct_wave->imsrc1[`0`];
3846	T t1 = -dct_wave->imsrc[`0`] - dct_wave->resrc1[`0`];
3847	dft_src[j*`2`-`1`] = t0;
3848	dft_src[j*`2`] = t1;
3849	}
3850
3851	dft_src[n-`1`] = (T)(src[`0`]`2`dct_wave->re);
3852	CCSIDFT(c, dft_src, dft_dst);
3853
3854	for( j = `0`; j < n2; j++, dst += dst_step*`2` )
3855	{
3856	dst[`0`] = dft_dst[j];
3857	dst[dst_step] = dft_dst[n-j-`1`];
3858	}
3859	}
3860
3861
3862	static void
3863	DCTInit( int n, int elem_size, void* _wave, int inv )
3864	{
3865	static const double DctScale[] =
3866	{
3867	`0.707106781186547570`, `0.500000000000000000`, `0.353553390593273790`,
3868	`0.250000000000000000`, `0.176776695296636890`, `0.125000000000000000`,
3869	`0.088388347648318447`, `0.062500000000000000`, `0.044194173824159223`,
3870	`0.031250000000000000`, `0.022097086912079612`, `0.015625000000000000`,
3871	`0.011048543456039806`, `0.007812500000000000`, `0.005524271728019903`,
3872	`0.003906250000000000`, `0.002762135864009952`, `0.001953125000000000`,
3873	`0.001381067932004976`, `0.000976562500000000`, `0.000690533966002488`,
3874	`0.000488281250000000`, `0.000345266983001244`, `0.000244140625000000`,
3875	`0.000172633491500622`, `0.000122070312500000`, `0.000086316745750311`,
3876	`0.000061035156250000`, `0.000043158372875155`, `0.000030517578125000`
3877	};
3878
3879	int i;
3880	Complex<double> w, w1;
3881	double t, scale;
3882
3883	if( n == `1` )
3884	return;
3885
3886	CV_Assert( (n&`1`) == `0` );
3887
3888	if( (n & (n - `1`)) == `0` )
3889	{
3890	int m;
3891	for( m = `0`; (unsigned)(`1` << m) < (unsigned)n; m++ )
3892	;
3893	scale = (!inv ? `2` : `1`)*DctScale[m];
3894	w1.re = DFTTab[m+`2`][`0`];
3895	w1.im = -DFTTab[m+`2`][`1`];
3896	}
3897	else
3898	{
3899	t = `1.`/(`2`*n);
3900	scale = (!inv ? `2` : `1`)*std::sqrt(x: t);
3901	w1.im = sin(x: -CV_PI*t);
3902	w1.re = std::sqrt(x: `1.` - w1.im*w1.im);
3903	}
3904	n >>= `1`;
3905
3906	if( elem_size == sizeof(Complex<double>) )
3907	{
3908	Complex<double>* wave = (Complex<double>*)_wave;
3909
3910	w.re = scale;
3911	w.im = `0.`;
3912
3913	for( i = `0`; i <= n; i++ )
3914	{
3915	wave[i] = w;
3916	t = w.rew1.re - w.imw1.im;
3917	w.im = w.rew1.im + w.imw1.re;
3918	w.re = t;
3919	}
3920	}
3921	else
3922	{
3923	Complex<float>* wave = (Complex<float>*)_wave;
3924	CV_Assert( elem_size == sizeof(Complex<float>) );
3925
3926	w.re = (float)scale;
3927	w.im = `0.f`;
3928
3929	for( i = `0`; i <= n; i++ )
3930	{
3931	wave[i].re = (float)w.re;
3932	wave[i].im = (float)w.im;
3933	t = w.rew1.re - w.imw1.im;
3934	w.im = w.rew1.im + w.imw1.re;
3935	w.re = t;
3936	}
3937	}
3938	}
3939
3940
3941	typedef void (DCTFunc)(const* OcvDftOptions & c, const void* src, size_t src_step, void* dft_src,
3942	void* dft_dst, void* dst, size_t dst_step, const void* dct_wave);
3943
3944	static void DCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst,
3945	float* dst, size_t dst_step, const Complexf* dct_wave)
3946	{
3947	DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3948	}
3949
3950	static void IDCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst,
3951	float* dst, size_t dst_step, const Complexf* dct_wave)
3952	{
3953	IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3954	}
3955
3956	static void DCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst,
3957	double* dst, size_t dst_step, const Complexd* dct_wave)
3958	{
3959	DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3960	}
3961
3962	static void IDCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst,
3963	double* dst, size_t dst_step, const Complexd* dct_wave)
3964	{
3965	IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3966	}
3967
3968	}
3969
3970	#ifdef HAVE_IPP
3971	namespace cv
3972	{
3973
3974	#if IPP_VERSION_X100 >= 900
3975	typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer);
3976	typedef IppStatus (CV_STDCALL * ippiDCTInit)(void* pDCTSpec, IppiSize roiSize, Ipp8u* pMemInit );
3977	typedef IppStatus (CV_STDCALL * ippiDCTGetSize)(IppiSize roiSize, int* pSizeSpec, int* pSizeInit, int* pSizeBuf);
3978	#elif IPP_VERSION_X100 >= 700
3979	typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f, int, Ipp32f, int, const void, Ipp8u);
3980	typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm);
3981	typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec);
3982	typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void, int**);
3983	#endif
3984
3985	class DctIPPLoop_Invoker : public ParallelLoopBody
3986	{
3987	public:
3988	DctIPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width, bool _inv, bool *_ok) :
3989	ParallelLoopBody (), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), inv(_inv), ok(_ok)
3990	{
3991	ok = true*;
3992	}
3993
3994	virtual void operator()(const Range& range) const CV_OVERRIDE
3995	{
3996	if(ok == false*)
3997	return;
3998
3999	#if IPP_VERSION_X100 >= 900
4000	IppiSize srcRoiSize = {.width: width, .height: `1`};
4001
4002	int specSize = `0`;
4003	int initSize = `0`;
4004	int bufferSize = `0`;
4005
4006	Ipp8u* pDCTSpec = NULL;
4007	Ipp8u* pBuffer = NULL;
4008	Ipp8u* pInitBuf = NULL;
4009
4010	#define IPP_RETURN \
4011	if(pDCTSpec) \
4012	ippFree(pDCTSpec); \
4013	if(pBuffer) \
4014	ippFree(pBuffer); \
4015	if(pInitBuf) \
4016	ippFree(pInitBuf); \
4017	return;
4018
4019	ippiDCTFunc ippiDCT_32f_C1R = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4020	ippiDCTInit ippDctInit = inv ? (ippiDCTInit)ippiDCTInvInit_32f : (ippiDCTInit)ippiDCTFwdInit_32f;
4021	ippiDCTGetSize ippDctGetSize = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
4022
4023	if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < `0`)
4024	{
4025	ok = false*;
4026	return;
4027	}
4028
4029	pDCTSpec = (Ipp8u*)CV_IPP_MALLOC(specSize);
4030	if(!pDCTSpec && specSize)
4031	{
4032	ok = false*;
4033	return;
4034	}
4035
4036	pBuffer = (Ipp8u*)CV_IPP_MALLOC(bufferSize);
4037	if(!pBuffer && bufferSize)
4038	{
4039	ok = false*;
4040	IPP_RETURN
4041	}
4042	pInitBuf = (Ipp8u*)CV_IPP_MALLOC(initSize);
4043	if(!pInitBuf && initSize)
4044	{
4045	ok = false*;
4046	IPP_RETURN
4047	}
4048
4049	if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < `0`)
4050	{
4051	ok = false*;
4052	IPP_RETURN
4053	}
4054
4055	for(int i = range.start; i < range.end; ++i)
4056	{
4057	if(CV_INSTRUMENT_FUN_IPP(ippiDCT_32f_C1R, (float)(src + src_step i), static_cast<int>(src_step), (float)(dst + dst_step i), static_cast<int>(dst_step), pDCTSpec, pBuffer) < `0`)
4058	{
4059	ok = false*;
4060	IPP_RETURN
4061	}
4062	}
4063	IPP_RETURN
4064	#undef IPP_RETURN
4065	#elif IPP_VERSION_X100 >= 700
4066	void* pDCTSpec;
4067	AutoBuffer<uchar> buf;
4068	uchar* pBuffer = `0`;
4069	int bufSize=`0`;
4070
4071	IppiSize srcRoiSize = {width, `1`};
4072
4073	CV_SUPPRESS_DEPRECATED_START
4074
4075	ippiDCTFunc ippDctFun = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4076	ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
4077	ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f;
4078	ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
4079
4080	if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=`0` && ippGetBufSize(pDCTSpec, &bufSize)>=`0`)
4081	{
4082	buf.allocate( bufSize );
4083	pBuffer = (uchar*)buf;
4084
4085	for( int i = range.start; i < range.end; ++i)
4086	{
4087	if(ippDctFun((float)(src + src_step i), static_cast<int>(src_step), (float)(dst + dst_step i), static_cast<int>(dst_step), pDCTSpec, (Ipp8u*)pBuffer) < `0`)
4088	{
4089	ok = false*;
4090	break;
4091	}
4092	}
4093	}
4094	else
4095	ok = false*;
4096
4097	if (pDCTSpec)
4098	ippFree(pDCTSpec);
4099
4100	CV_SUPPRESS_DEPRECATED_END
4101	#else
4102	CV_UNUSED(range);
4103	ok = false*;
4104	#endif
4105	}
4106
4107	private:
4108	const uchar * src;
4109	size_t src_step;
4110	uchar * dst;
4111	size_t dst_step;
4112	int width;
4113	bool inv;
4114	bool *ok;
4115	};
4116
4117	static bool DctIPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv)
4118	{
4119	bool ok;
4120	parallel_for_(range: Range (`0`, height), body: DctIPPLoop_Invoker (src, src_step, dst, dst_step, width, inv, &ok), nstripes: height/(double)(`1`<<`4`) );
4121	return ok;
4122	}
4123
4124	static bool ippi_DCT_32f(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, bool row)
4125	{
4126	CV_INSTRUMENT_REGION_IPP();
4127
4128	if(row)
4129	return DctIPPLoop(src, src_step, dst, dst_step, width, height, inv);
4130	else
4131	{
4132	#if IPP_VERSION_X100 >= 900
4133	IppiSize srcRoiSize = {.width: width, .height: height};
4134
4135	int specSize = `0`;
4136	int initSize = `0`;
4137	int bufferSize = `0`;
4138
4139	Ipp8u* pDCTSpec = NULL;
4140	Ipp8u* pBuffer = NULL;
4141	Ipp8u* pInitBuf = NULL;
4142
4143	#define IPP_RELEASE \
4144	if(pDCTSpec) \
4145	ippFree(pDCTSpec); \
4146	if(pBuffer) \
4147	ippFree(pBuffer); \
4148	if(pInitBuf) \
4149	ippFree(pInitBuf); \
4150
4151	ippiDCTFunc ippiDCT_32f_C1R = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4152	ippiDCTInit ippDctInit = inv ? (ippiDCTInit)ippiDCTInvInit_32f : (ippiDCTInit)ippiDCTFwdInit_32f;
4153	ippiDCTGetSize ippDctGetSize = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
4154
4155	if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < `0`)
4156	return false;
4157
4158	pDCTSpec = (Ipp8u*)CV_IPP_MALLOC(specSize);
4159	if(!pDCTSpec && specSize)
4160	return false;
4161
4162	pBuffer = (Ipp8u*)CV_IPP_MALLOC(bufferSize);
4163	if(!pBuffer && bufferSize)
4164	{
4165	IPP_RELEASE
4166	return false;
4167	}
4168	pInitBuf = (Ipp8u*)CV_IPP_MALLOC(initSize);
4169	if(!pInitBuf && initSize)
4170	{
4171	IPP_RELEASE
4172	return false;
4173	}
4174
4175	if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < `0`)
4176	{
4177	IPP_RELEASE
4178	return false;
4179	}
4180
4181	if(CV_INSTRUMENT_FUN_IPP(ippiDCT_32f_C1R, (float)src, static_cast<int>(src_step), (float)dst, static_cast*<int*>(dst_step), pDCTSpec, pBuffer) < `0`)
4182	{
4183	IPP_RELEASE
4184	return false;
4185	}
4186
4187	IPP_RELEASE
4188	return true;
4189	#undef IPP_RELEASE
4190	#elif IPP_VERSION_X100 >= 700
4191	IppStatus status;
4192	void* pDCTSpec;
4193	AutoBuffer<uchar> buf;
4194	uchar* pBuffer = `0`;
4195	int bufSize=`0`;
4196
4197	IppiSize srcRoiSize = {width, height};
4198
4199	CV_SUPPRESS_DEPRECATED_START
4200
4201	ippiDCTFunc ippDctFun = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4202	ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
4203	ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f;
4204	ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
4205
4206	status = ippStsErr;
4207
4208	if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=`0` && ippGetBufSize(pDCTSpec, &bufSize)>=`0`)
4209	{
4210	buf.allocate( bufSize );
4211	pBuffer = (uchar*)buf;
4212
4213	status = ippDctFun((float)src, static_cast<int>(src_step), (float)dst, static_cast*<int>(dst_step), pDCTSpec, (Ipp8u)pBuffer);
4214	}
4215
4216	if (pDCTSpec)
4217	ippFree(pDCTSpec);
4218
4219	CV_SUPPRESS_DEPRECATED_END
4220
4221	return status >= `0`;
4222	#else
4223	CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(inv); CV_UNUSED(row);
4224	return false;
4225	#endif
4226	}
4227	}
4228	}
4229	#endif
4230
4231	namespace cv {
4232
4233	class OcvDctImpl CV_FINAL : public hal::DCT2D
4234	{
4235	public:
4236	OcvDftOptions opt;
4237
4238	int _factors[`34`];
4239	AutoBuffer<uint> wave_buf;
4240	AutoBuffer<int> itab_buf;
4241
4242	DCTFunc dct_func;
4243	bool isRowTransform;
4244	bool isInverse;
4245	bool isContinuous;
4246	int start_stage;
4247	int end_stage;
4248	int width;
4249	int height;
4250	int depth;
4251
4252	void init(int _width, int _height, int _depth, int flags)
4253	{
4254	width = _width;
4255	height = _height;
4256	depth = _depth;
4257	isInverse = (flags & CV_HAL_DFT_INVERSE) != `0`;
4258	isRowTransform = (flags & CV_HAL_DFT_ROWS) != `0`;
4259	isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != `0`;
4260	static DCTFunc dct_tbl[`4`] =
4261	{
4262	(DCTFunc)DCT_32f,
4263	(DCTFunc)IDCT_32f,
4264	(DCTFunc)DCT_64f,
4265	(DCTFunc)IDCT_64f
4266	};
4267	dct_func = dct_tbl[(int)isInverse + (depth == CV_64F)*`2`];
4268	opt.nf = `0`;
4269	opt.isComplex = false;
4270	opt.isInverse = false;
4271	opt.noPermute = false;
4272	opt.scale = `1.`;
4273	opt.factors = _factors;
4274
4275	if (isRowTransform \|\| height == `1` \|\| (width == `1` && isContinuous))
4276	{
4277	start_stage = end_stage = `0`;
4278	}
4279	else
4280	{
4281	start_stage = (width == `1`);
4282	end_stage = `1`;
4283	}
4284	}
4285	void apply(const uchar src, size_t src_step, uchar dst, size_t dst_step) CV_OVERRIDE
4286	{
4287	CV_IPP_RUN(IPP_VERSION_X100 >= `700` && depth == CV_32F, ippi_DCT_32f(src, src_step, dst, dst_step, width, height, isInverse, isRowTransform))
4288
4289	AutoBuffer<uchar> dct_wave;
4290	AutoBuffer<uchar> src_buf, dst_buf;
4291	uchar src_dft_buf = `0`, dst_dft_buf = `0`;
4292	int prev_len = `0`;
4293	int elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
4294	int complex_elem_size = elem_size*`2`;
4295
4296	for(int stage = start_stage ; stage <= end_stage; stage++ )
4297	{
4298	const uchar* sptr = src;
4299	uchar* dptr = dst;
4300	size_t sstep0, sstep1, dstep0, dstep1;
4301	int len, count;
4302
4303	if( stage == `0` )
4304	{
4305	len = width;
4306	count = height;
4307	if( len == `1` && !isRowTransform )
4308	{
4309	len = height;
4310	count = `1`;
4311	}
4312	sstep0 = src_step;
4313	dstep0 = dst_step;
4314	sstep1 = dstep1 = elem_size;
4315	}
4316	else
4317	{
4318	len = height;
4319	count = width;
4320	sstep1 = src_step;
4321	dstep1 = dst_step;
4322	sstep0 = dstep0 = elem_size;
4323	}
4324
4325	opt.n = len;
4326	opt.tab_size = len;
4327
4328	if( len != prev_len )
4329	{
4330	if( len > `1` && (len & `1`) )
4331	CV_Error( cv::Error::StsNotImplemented, "Odd-size DCT\'s are not implemented" );
4332
4333	opt.nf = DFTFactorize( n: len, factors: opt.factors );
4334	bool inplace_transform = opt.factors[`0`] == opt.factors[opt.nf-`1`];
4335
4336	wave_buf.allocate(size: len*complex_elem_size);
4337	opt.wave = wave_buf.data();
4338	itab_buf.allocate(size: len);
4339	opt.itab = itab_buf.data();
4340	DFTInit( n0: len, nf: opt.nf, factors: opt.factors, itab: opt.itab, elem_size: complex_elem_size, wave: opt.wave, inv_itab: isInverse );
4341
4342	dct_wave.allocate(size: (len/`2` + `1`)*complex_elem_size);
4343	src_buf.allocate(size: len*elem_size);
4344	src_dft_buf = src_buf.data();
4345	if(!inplace_transform)
4346	{
4347	dst_buf.allocate(size: len*elem_size);
4348	dst_dft_buf = dst_buf.data();
4349	}
4350	else
4351	{
4352	dst_dft_buf = src_buf.data();
4353	}
4354	DCTInit( n: len, elem_size: complex_elem_size, wave: dct_wave.data(), inv: isInverse);
4355	prev_len = len;
4356	}
4357	// otherwise reuse the tables calculated on the previous stage
4358	for(unsigned i = `0`; i < static_cast<unsigned>(count); i++ )
4359	{
4360	dct_func( opt, sptr + i*sstep0, sstep1, src_dft_buf, dst_dft_buf,
4361	dptr + i*dstep0, dstep1, dct_wave.data());
4362	}
4363	src = dst;
4364	src_step = dst_step;
4365	}
4366	}
4367	};
4368
4369	struct ReplacementDCT2D : public hal::DCT2D
4370	{
4371	cvhalDFT *context;
4372	bool isInitialized;
4373
4374	ReplacementDCT2D() : context(`0`), isInitialized(false) {}
4375	bool init(int width, int height, int depth, int flags)
4376	{
4377	int res = cv_hal_dctInit2D(context: &context, width, height, depth, flags);
4378	isInitialized = (res == CV_HAL_ERROR_OK);
4379	return isInitialized;
4380	}
4381	void apply(const uchar src_data, size_t src_step, uchar dst_data, size_t dst_step) CV_OVERRIDE
4382	{
4383	if (isInitialized)
4384	{
4385	CALL_HAL(dct2D, cv_hal_dct2D, context, src_data, src_step, dst_data, dst_step);
4386	}
4387	}
4388	~ReplacementDCT2D()
4389	{
4390	if (isInitialized)
4391	{
4392	CALL_HAL(dctFree2D, cv_hal_dctFree2D, context);
4393	}
4394	}
4395	};
4396
4397	namespace hal {
4398
4399	Ptr<DCT2D> DCT2D::create(int width, int height, int depth, int flags)
4400	{
4401	{
4402	ReplacementDCT2D impl = new* ReplacementDCT2D ();
4403	if (impl->init(width, height, depth, flags))
4404	{
4405	return Ptr<DCT2D>(impl);
4406	}
4407	delete impl;
4408	}
4409	{
4410	OcvDctImpl impl = new* OcvDctImpl ();
4411	impl->init(width: width, height: height, depth: depth, flags);
4412	return Ptr<DCT2D>(impl);
4413	}
4414	}
4415
4416	} // cv::hal::
4417	} // cv::
4418
4419	void cv::dct( InputArray _src0, OutputArray _dst, int flags )
4420	{
4421	CV_INSTRUMENT_REGION();
4422
4423	Mat src0 = _src0.getMat(), src = src0;
4424	int type = src.type(), depth = src.depth();
4425
4426	CV_Assert( type == CV_32FC1 \|\| type == CV_64FC1 );
4427	_dst.create( rows: src.rows, cols: src.cols, type );
4428	Mat dst = _dst.getMat();
4429
4430	int f = `0`;
4431	if ((flags & DFT_ROWS) != `0`)
4432	f \|= CV_HAL_DFT_ROWS;
4433	if ((flags & DCT_INVERSE) != `0`)
4434	f \|= CV_HAL_DFT_INVERSE;
4435	if (src.isContinuous() && dst.isContinuous())
4436	f \|= CV_HAL_DFT_IS_CONTINUOUS;
4437
4438	Ptr<hal::DCT2D> c = hal::DCT2D::create(width: src.cols, height: src.rows, depth, flags: f);
4439	c ->apply(src_data: src.data, src_step: src.step, dst_data: dst.data, dst_step: dst.step);
4440	}
4441
4442
4443	void cv::idct( InputArray src, OutputArray dst, int flags )
4444	{
4445	CV_INSTRUMENT_REGION();
4446
4447	dct( src0: src, dst: dst, flags: flags \| DCT_INVERSE );
4448	}
4449
4450	namespace cv
4451	{
4452
4453	static const int optimalDFTSizeTab[] = {
4454	`1`, `2`, `3`, `4`, `5`, `6`, `8`, `9`, `10`, `12`, `15`, `16`, `18`, `20`, `24`, `25`, `27`, `30`, `32`, `36`, `40`, `45`, `48`,
4455	`50`, `54`, `60`, `64`, `72`, `75`, `80`, `81`, `90`, `96`, `100`, `108`, `120`, `125`, `128`, `135`, `144`, `150`, `160`,
4456	`162`, `180`, `192`, `200`, `216`, `225`, `240`, `243`, `250`, `256`, `270`, `288`, `300`, `320`, `324`, `360`, `375`,
4457	`384`, `400`, `405`, `432`, `450`, `480`, `486`, `500`, `512`, `540`, `576`, `600`, `625`, `640`, `648`, `675`, `720`,
4458	`729`, `750`, `768`, `800`, `810`, `864`, `900`, `960`, `972`, `1000`, `1024`, `1080`, `1125`, `1152`, `1200`,
4459	`1215`, `1250`, `1280`, `1296`, `1350`, `1440`, `1458`, `1500`, `1536`, `1600`, `1620`, `1728`, `1800`, `1875`,
4460	`1920`, `1944`, `2000`, `2025`, `2048`, `2160`, `2187`, `2250`, `2304`, `2400`, `2430`, `2500`, `2560`, `2592`,
4461	`2700`, `2880`, `2916`, `3000`, `3072`, `3125`, `3200`, `3240`, `3375`, `3456`, `3600`, `3645`, `3750`, `3840`,
4462	`3888`, `4000`, `4050`, `4096`, `4320`, `4374`, `4500`, `4608`, `4800`, `4860`, `5000`, `5120`, `5184`, `5400`,
4463	`5625`, `5760`, `5832`, `6000`, `6075`, `6144`, `6250`, `6400`, `6480`, `6561`, `6750`, `6912`, `7200`, `7290`,
4464	`7500`, `7680`, `7776`, `8000`, `8100`, `8192`, `8640`, `8748`, `9000`, `9216`, `9375`, `9600`, `9720`, `10000`,
4465	`10125`, `10240`, `10368`, `10800`, `10935`, `11250`, `11520`, `11664`, `12000`, `12150`, `12288`, `12500`,
4466	`12800`, `12960`, `13122`, `13500`, `13824`, `14400`, `14580`, `15000`, `15360`, `15552`, `15625`, `16000`,
4467	`16200`, `16384`, `16875`, `17280`, `17496`, `18000`, `18225`, `18432`, `18750`, `19200`, `19440`, `19683`,
4468	`20000`, `20250`, `20480`, `20736`, `21600`, `21870`, `22500`, `23040`, `23328`, `24000`, `24300`, `24576`,
4469	`25000`, `25600`, `25920`, `26244`, `27000`, `27648`, `28125`, `28800`, `29160`, `30000`, `30375`, `30720`,
4470	`31104`, `31250`, `32000`, `32400`, `32768`, `32805`, `33750`, `34560`, `34992`, `36000`, `36450`, `36864`,
4471	`37500`, `38400`, `38880`, `39366`, `40000`, `40500`, `40960`, `41472`, `43200`, `43740`, `45000`, `46080`,
4472	`46656`, `46875`, `48000`, `48600`, `49152`, `50000`, `50625`, `51200`, `51840`, `52488`, `54000`, `54675`,
4473	`55296`, `56250`, `57600`, `58320`, `59049`, `60000`, `60750`, `61440`, `62208`, `62500`, `64000`, `64800`,
4474	`65536`, `65610`, `67500`, `69120`, `69984`, `72000`, `72900`, `73728`, `75000`, `76800`, `77760`, `78125`,
4475	`78732`, `80000`, `81000`, `81920`, `82944`, `84375`, `86400`, `87480`, `90000`, `91125`, `92160`, `93312`,
4476	`93750`, `96000`, `97200`, `98304`, `98415`, `100000`, `101250`, `102400`, `103680`, `104976`, `108000`,
4477	`109350`, `110592`, `112500`, `115200`, `116640`, `118098`, `120000`, `121500`, `122880`, `124416`, `125000`,
4478	`128000`, `129600`, `131072`, `131220`, `135000`, `138240`, `139968`, `140625`, `144000`, `145800`, `147456`,
4479	`150000`, `151875`, `153600`, `155520`, `156250`, `157464`, `160000`, `162000`, `163840`, `164025`, `165888`,
4480	`168750`, `172800`, `174960`, `177147`, `180000`, `182250`, `184320`, `186624`, `187500`, `192000`, `194400`,
4481	`196608`, `196830`, `200000`, `202500`, `204800`, `207360`, `209952`, `216000`, `218700`, `221184`, `225000`,
4482	`230400`, `233280`, `234375`, `236196`, `240000`, `243000`, `245760`, `248832`, `250000`, `253125`, `256000`,
4483	`259200`, `262144`, `262440`, `270000`, `273375`, `276480`, `279936`, `281250`, `288000`, `291600`, `294912`,
4484	`295245`, `300000`, `303750`, `307200`, `311040`, `312500`, `314928`, `320000`, `324000`, `327680`, `328050`,
4485	`331776`, `337500`, `345600`, `349920`, `354294`, `360000`, `364500`, `368640`, `373248`, `375000`, `384000`,
4486	`388800`, `390625`, `393216`, `393660`, `400000`, `405000`, `409600`, `414720`, `419904`, `421875`, `432000`,
4487	`437400`, `442368`, `450000`, `455625`, `460800`, `466560`, `468750`, `472392`, `480000`, `486000`, `491520`,
4488	`492075`, `497664`, `500000`, `506250`, `512000`, `518400`, `524288`, `524880`, `531441`, `540000`, `546750`,
4489	`552960`, `559872`, `562500`, `576000`, `583200`, `589824`, `590490`, `600000`, `607500`, `614400`, `622080`,
4490	`625000`, `629856`, `640000`, `648000`, `655360`, `656100`, `663552`, `675000`, `691200`, `699840`, `703125`,
4491	`708588`, `720000`, `729000`, `737280`, `746496`, `750000`, `759375`, `768000`, `777600`, `781250`, `786432`,
4492	`787320`, `800000`, `810000`, `819200`, `820125`, `829440`, `839808`, `843750`, `864000`, `874800`, `884736`,
4493	`885735`, `900000`, `911250`, `921600`, `933120`, `937500`, `944784`, `960000`, `972000`, `983040`, `984150`,
4494	`995328`, `1000000`, `1012500`, `1024000`, `1036800`, `1048576`, `1049760`, `1062882`, `1080000`, `1093500`,
4495	`1105920`, `1119744`, `1125000`, `1152000`, `1166400`, `1171875`, `1179648`, `1180980`, `1200000`,
4496	`1215000`, `1228800`, `1244160`, `1250000`, `1259712`, `1265625`, `1280000`, `1296000`, `1310720`,
4497	`1312200`, `1327104`, `1350000`, `1366875`, `1382400`, `1399680`, `1406250`, `1417176`, `1440000`,
4498	`1458000`, `1474560`, `1476225`, `1492992`, `1500000`, `1518750`, `1536000`, `1555200`, `1562500`,
4499	`1572864`, `1574640`, `1594323`, `1600000`, `1620000`, `1638400`, `1640250`, `1658880`, `1679616`,
4500	`1687500`, `1728000`, `1749600`, `1769472`, `1771470`, `1800000`, `1822500`, `1843200`, `1866240`,
4501	`1875000`, `1889568`, `1920000`, `1944000`, `1953125`, `1966080`, `1968300`, `1990656`, `2000000`,
4502	`2025000`, `2048000`, `2073600`, `2097152`, `2099520`, `2109375`, `2125764`, `2160000`, `2187000`,
4503	`2211840`, `2239488`, `2250000`, `2278125`, `2304000`, `2332800`, `2343750`, `2359296`, `2361960`,
4504	`2400000`, `2430000`, `2457600`, `2460375`, `2488320`, `2500000`, `2519424`, `2531250`, `2560000`,
4505	`2592000`, `2621440`, `2624400`, `2654208`, `2657205`, `2700000`, `2733750`, `2764800`, `2799360`,
4506	`2812500`, `2834352`, `2880000`, `2916000`, `2949120`, `2952450`, `2985984`, `3000000`, `3037500`,
4507	`3072000`, `3110400`, `3125000`, `3145728`, `3149280`, `3188646`, `3200000`, `3240000`, `3276800`,
4508	`3280500`, `3317760`, `3359232`, `3375000`, `3456000`, `3499200`, `3515625`, `3538944`, `3542940`,
4509	`3600000`, `3645000`, `3686400`, `3732480`, `3750000`, `3779136`, `3796875`, `3840000`, `3888000`,
4510	`3906250`, `3932160`, `3936600`, `3981312`, `4000000`, `4050000`, `4096000`, `4100625`, `4147200`,
4511	`4194304`, `4199040`, `4218750`, `4251528`, `4320000`, `4374000`, `4423680`, `4428675`, `4478976`,
4512	`4500000`, `4556250`, `4608000`, `4665600`, `4687500`, `4718592`, `4723920`, `4782969`, `4800000`,
4513	`4860000`, `4915200`, `4920750`, `4976640`, `5000000`, `5038848`, `5062500`, `5120000`, `5184000`,
4514	`5242880`, `5248800`, `5308416`, `5314410`, `5400000`, `5467500`, `5529600`, `5598720`, `5625000`,
4515	`5668704`, `5760000`, `5832000`, `5859375`, `5898240`, `5904900`, `5971968`, `6000000`, `6075000`,
4516	`6144000`, `6220800`, `6250000`, `6291456`, `6298560`, `6328125`, `6377292`, `6400000`, `6480000`,
4517	`6553600`, `6561000`, `6635520`, `6718464`, `6750000`, `6834375`, `6912000`, `6998400`, `7031250`,
4518	`7077888`, `7085880`, `7200000`, `7290000`, `7372800`, `7381125`, `7464960`, `7500000`, `7558272`,
4519	`7593750`, `7680000`, `7776000`, `7812500`, `7864320`, `7873200`, `7962624`, `7971615`, `8000000`,
4520	`8100000`, `8192000`, `8201250`, `8294400`, `8388608`, `8398080`, `8437500`, `8503056`, `8640000`,
4521	`8748000`, `8847360`, `8857350`, `8957952`, `9000000`, `9112500`, `9216000`, `9331200`, `9375000`,
4522	`9437184`, `9447840`, `9565938`, `9600000`, `9720000`, `9765625`, `9830400`, `9841500`, `9953280`,
4523	`10000000`, `10077696`, `10125000`, `10240000`, `10368000`, `10485760`, `10497600`, `10546875`, `10616832`,
4524	`10628820`, `10800000`, `10935000`, `11059200`, `11197440`, `11250000`, `11337408`, `11390625`, `11520000`,
4525	`11664000`, `11718750`, `11796480`, `11809800`, `11943936`, `12000000`, `12150000`, `12288000`, `12301875`,
4526	`12441600`, `12500000`, `12582912`, `12597120`, `12656250`, `12754584`, `12800000`, `12960000`, `13107200`,
4527	`13122000`, `13271040`, `13286025`, `13436928`, `13500000`, `13668750`, `13824000`, `13996800`, `14062500`,
4528	`14155776`, `14171760`, `14400000`, `14580000`, `14745600`, `14762250`, `14929920`, `15000000`, `15116544`,
4529	`15187500`, `15360000`, `15552000`, `15625000`, `15728640`, `15746400`, `15925248`, `15943230`, `16000000`,
4530	`16200000`, `16384000`, `16402500`, `16588800`, `16777216`, `16796160`, `16875000`, `17006112`, `17280000`,
4531	`17496000`, `17578125`, `17694720`, `17714700`, `17915904`, `18000000`, `18225000`, `18432000`, `18662400`,
4532	`18750000`, `18874368`, `18895680`, `18984375`, `19131876`, `19200000`, `19440000`, `19531250`, `19660800`,
4533	`19683000`, `19906560`, `20000000`, `20155392`, `20250000`, `20480000`, `20503125`, `20736000`, `20971520`,
4534	`20995200`, `21093750`, `21233664`, `21257640`, `21600000`, `21870000`, `22118400`, `22143375`, `22394880`,
4535	`22500000`, `22674816`, `22781250`, `23040000`, `23328000`, `23437500`, `23592960`, `23619600`, `23887872`,
4536	`23914845`, `24000000`, `24300000`, `24576000`, `24603750`, `24883200`, `25000000`, `25165824`, `25194240`,
4537	`25312500`, `25509168`, `25600000`, `25920000`, `26214400`, `26244000`, `26542080`, `26572050`, `26873856`,
4538	`27000000`, `27337500`, `27648000`, `27993600`, `28125000`, `28311552`, `28343520`, `28800000`, `29160000`,
4539	`29296875`, `29491200`, `29524500`, `29859840`, `30000000`, `30233088`, `30375000`, `30720000`, `31104000`,
4540	`31250000`, `31457280`, `31492800`, `31640625`, `31850496`, `31886460`, `32000000`, `32400000`, `32768000`,
4541	`32805000`, `33177600`, `33554432`, `33592320`, `33750000`, `34012224`, `34171875`, `34560000`, `34992000`,
4542	`35156250`, `35389440`, `35429400`, `35831808`, `36000000`, `36450000`, `36864000`, `36905625`, `37324800`,
4543	`37500000`, `37748736`, `37791360`, `37968750`, `38263752`, `38400000`, `38880000`, `39062500`, `39321600`,
4544	`39366000`, `39813120`, `39858075`, `40000000`, `40310784`, `40500000`, `40960000`, `41006250`, `41472000`,
4545	`41943040`, `41990400`, `42187500`, `42467328`, `42515280`, `43200000`, `43740000`, `44236800`, `44286750`,
4546	`44789760`, `45000000`, `45349632`, `45562500`, `46080000`, `46656000`, `46875000`, `47185920`, `47239200`,
4547	`47775744`, `47829690`, `48000000`, `48600000`, `48828125`, `49152000`, `49207500`, `49766400`, `50000000`,
4548	`50331648`, `50388480`, `50625000`, `51018336`, `51200000`, `51840000`, `52428800`, `52488000`, `52734375`,
4549	`53084160`, `53144100`, `53747712`, `54000000`, `54675000`, `55296000`, `55987200`, `56250000`, `56623104`,
4550	`56687040`, `56953125`, `57600000`, `58320000`, `58593750`, `58982400`, `59049000`, `59719680`, `60000000`,
4551	`60466176`, `60750000`, `61440000`, `61509375`, `62208000`, `62500000`, `62914560`, `62985600`, `63281250`,
4552	`63700992`, `63772920`, `64000000`, `64800000`, `65536000`, `65610000`, `66355200`, `66430125`, `67108864`,
4553	`67184640`, `67500000`, `68024448`, `68343750`, `69120000`, `69984000`, `70312500`, `70778880`, `70858800`,
4554	`71663616`, `72000000`, `72900000`, `73728000`, `73811250`, `74649600`, `75000000`, `75497472`, `75582720`,
4555	`75937500`, `76527504`, `76800000`, `77760000`, `78125000`, `78643200`, `78732000`, `79626240`, `79716150`,
4556	`80000000`, `80621568`, `81000000`, `81920000`, `82012500`, `82944000`, `83886080`, `83980800`, `84375000`,
4557	`84934656`, `85030560`, `86400000`, `87480000`, `87890625`, `88473600`, `88573500`, `89579520`, `90000000`,
4558	`90699264`, `91125000`, `92160000`, `93312000`, `93750000`, `94371840`, `94478400`, `94921875`, `95551488`,
4559	`95659380`, `96000000`, `97200000`, `97656250`, `98304000`, `98415000`, `99532800`, `100000000`,
4560	`100663296`, `100776960`, `101250000`, `102036672`, `102400000`, `102515625`, `103680000`, `104857600`,
4561	`104976000`, `105468750`, `106168320`, `106288200`, `107495424`, `108000000`, `109350000`, `110592000`,
4562	`110716875`, `111974400`, `112500000`, `113246208`, `113374080`, `113906250`, `115200000`, `116640000`,
4563	`117187500`, `117964800`, `118098000`, `119439360`, `119574225`, `120000000`, `120932352`, `121500000`,
4564	`122880000`, `123018750`, `124416000`, `125000000`, `125829120`, `125971200`, `126562500`, `127401984`,
4565	`127545840`, `128000000`, `129600000`, `131072000`, `131220000`, `132710400`, `132860250`, `134217728`,
4566	`134369280`, `135000000`, `136048896`, `136687500`, `138240000`, `139968000`, `140625000`, `141557760`,
4567	`141717600`, `143327232`, `144000000`, `145800000`, `146484375`, `147456000`, `147622500`, `149299200`,
4568	`150000000`, `150994944`, `151165440`, `151875000`, `153055008`, `153600000`, `155520000`, `156250000`,
4569	`157286400`, `157464000`, `158203125`, `159252480`, `159432300`, `160000000`, `161243136`, `162000000`,
4570	`163840000`, `164025000`, `165888000`, `167772160`, `167961600`, `168750000`, `169869312`, `170061120`,
4571	`170859375`, `172800000`, `174960000`, `175781250`, `176947200`, `177147000`, `179159040`, `180000000`,
4572	`181398528`, `182250000`, `184320000`, `184528125`, `186624000`, `187500000`, `188743680`, `188956800`,
4573	`189843750`, `191102976`, `191318760`, `192000000`, `194400000`, `195312500`, `196608000`, `196830000`,
4574	`199065600`, `199290375`, `200000000`, `201326592`, `201553920`, `202500000`, `204073344`, `204800000`,
4575	`205031250`, `207360000`, `209715200`, `209952000`, `210937500`, `212336640`, `212576400`, `214990848`,
4576	`216000000`, `218700000`, `221184000`, `221433750`, `223948800`, `225000000`, `226492416`, `226748160`,
4577	`227812500`, `230400000`, `233280000`, `234375000`, `235929600`, `236196000`, `238878720`, `239148450`,
4578	`240000000`, `241864704`, `243000000`, `244140625`, `245760000`, `246037500`, `248832000`, `250000000`,
4579	`251658240`, `251942400`, `253125000`, `254803968`, `255091680`, `256000000`, `259200000`, `262144000`,
4580	`262440000`, `263671875`, `265420800`, `265720500`, `268435456`, `268738560`, `270000000`, `272097792`,
4581	`273375000`, `276480000`, `279936000`, `281250000`, `283115520`, `283435200`, `284765625`, `286654464`,
4582	`288000000`, `291600000`, `292968750`, `294912000`, `295245000`, `298598400`, `300000000`, `301989888`,
4583	`302330880`, `303750000`, `306110016`, `307200000`, `307546875`, `311040000`, `312500000`, `314572800`,
4584	`314928000`, `316406250`, `318504960`, `318864600`, `320000000`, `322486272`, `324000000`, `327680000`,
4585	`328050000`, `331776000`, `332150625`, `335544320`, `335923200`, `337500000`, `339738624`, `340122240`,
4586	`341718750`, `345600000`, `349920000`, `351562500`, `353894400`, `354294000`, `358318080`, `360000000`,
4587	`362797056`, `364500000`, `368640000`, `369056250`, `373248000`, `375000000`, `377487360`, `377913600`,
4588	`379687500`, `382205952`, `382637520`, `384000000`, `388800000`, `390625000`, `393216000`, `393660000`,
4589	`398131200`, `398580750`, `400000000`, `402653184`, `403107840`, `405000000`, `408146688`, `409600000`,
4590	`410062500`, `414720000`, `419430400`, `419904000`, `421875000`, `424673280`, `425152800`, `429981696`,
4591	`432000000`, `437400000`, `439453125`, `442368000`, `442867500`, `447897600`, `450000000`, `452984832`,
4592	`453496320`, `455625000`, `460800000`, `466560000`, `468750000`, `471859200`, `472392000`, `474609375`,
4593	`477757440`, `478296900`, `480000000`, `483729408`, `486000000`, `488281250`, `491520000`, `492075000`,
4594	`497664000`, `500000000`, `503316480`, `503884800`, `506250000`, `509607936`, `510183360`, `512000000`,
4595	`512578125`, `518400000`, `524288000`, `524880000`, `527343750`, `530841600`, `531441000`, `536870912`,
4596	`537477120`, `540000000`, `544195584`, `546750000`, `552960000`, `553584375`, `559872000`, `562500000`,
4597	`566231040`, `566870400`, `569531250`, `573308928`, `576000000`, `583200000`, `585937500`, `589824000`,
4598	`590490000`, `597196800`, `597871125`, `600000000`, `603979776`, `604661760`, `607500000`, `612220032`,
4599	`614400000`, `615093750`, `622080000`, `625000000`, `629145600`, `629856000`, `632812500`, `637009920`,
4600	`637729200`, `640000000`, `644972544`, `648000000`, `655360000`, `656100000`, `663552000`, `664301250`,
4601	`671088640`, `671846400`, `675000000`, `679477248`, `680244480`, `683437500`, `691200000`, `699840000`,
4602	`703125000`, `707788800`, `708588000`, `716636160`, `720000000`, `725594112`, `729000000`, `732421875`,
4603	`737280000`, `738112500`, `746496000`, `750000000`, `754974720`, `755827200`, `759375000`, `764411904`,
4604	`765275040`, `768000000`, `777600000`, `781250000`, `786432000`, `787320000`, `791015625`, `796262400`,
4605	`797161500`, `800000000`, `805306368`, `806215680`, `810000000`, `816293376`, `819200000`, `820125000`,
4606	`829440000`, `838860800`, `839808000`, `843750000`, `849346560`, `850305600`, `854296875`, `859963392`,
4607	`864000000`, `874800000`, `878906250`, `884736000`, `885735000`, `895795200`, `900000000`, `905969664`,
4608	`906992640`, `911250000`, `921600000`, `922640625`, `933120000`, `937500000`, `943718400`, `944784000`,
4609	`949218750`, `955514880`, `956593800`, `960000000`, `967458816`, `972000000`, `976562500`, `983040000`,
4610	`984150000`, `995328000`, `996451875`, `1000000000`, `1006632960`, `1007769600`, `1012500000`,
4611	`1019215872`, `1020366720`, `1024000000`, `1025156250`, `1036800000`, `1048576000`, `1049760000`,
4612	`1054687500`, `1061683200`, `1062882000`, `1073741824`, `1074954240`, `1080000000`, `1088391168`,
4613	`1093500000`, `1105920000`, `1107168750`, `1119744000`, `1125000000`, `1132462080`, `1133740800`,
4614	`1139062500`, `1146617856`, `1152000000`, `1166400000`, `1171875000`, `1179648000`, `1180980000`,
4615	`1194393600`, `1195742250`, `1200000000`, `1207959552`, `1209323520`, `1215000000`, `1220703125`,
4616	`1224440064`, `1228800000`, `1230187500`, `1244160000`, `1250000000`, `1258291200`, `1259712000`,
4617	`1265625000`, `1274019840`, `1275458400`, `1280000000`, `1289945088`, `1296000000`, `1310720000`,
4618	`1312200000`, `1318359375`, `1327104000`, `1328602500`, `1342177280`, `1343692800`, `1350000000`,
4619	`1358954496`, `1360488960`, `1366875000`, `1382400000`, `1399680000`, `1406250000`, `1415577600`,
4620	`1417176000`, `1423828125`, `1433272320`, `1440000000`, `1451188224`, `1458000000`, `1464843750`,
4621	`1474560000`, `1476225000`, `1492992000`, `1500000000`, `1509949440`, `1511654400`, `1518750000`,
4622	`1528823808`, `1530550080`, `1536000000`, `1537734375`, `1555200000`, `1562500000`, `1572864000`,
4623	`1574640000`, `1582031250`, `1592524800`, `1594323000`, `1600000000`, `1610612736`, `1612431360`,
4624	`1620000000`, `1632586752`, `1638400000`, `1640250000`, `1658880000`, `1660753125`, `1677721600`,
4625	`1679616000`, `1687500000`, `1698693120`, `1700611200`, `1708593750`, `1719926784`, `1728000000`,
4626	`1749600000`, `1757812500`, `1769472000`, `1771470000`, `1791590400`, `1800000000`, `1811939328`,
4627	`1813985280`, `1822500000`, `1843200000`, `1845281250`, `1866240000`, `1875000000`, `1887436800`,
4628	`1889568000`, `1898437500`, `1911029760`, `1913187600`, `1920000000`, `1934917632`, `1944000000`,
4629	`1953125000`, `1966080000`, `1968300000`, `1990656000`, `1992903750`, `2000000000`, `2013265920`,
4630	`2015539200`, `2025000000`, `2038431744`, `2040733440`, `2048000000`, `2050312500`, `2073600000`,
4631	`2097152000`, `2099520000`, `2109375000`, `2123366400`, `2125764000`
4632	};
4633
4634	}
4635
4636	int cv::getOptimalDFTSize( int size0 )
4637	{
4638	int a = `0`, b = sizeof(optimalDFTSizeTab)/sizeof(optimalDFTSizeTab[`0`]) - `1`;
4639	if( (unsigned)size0 >= (unsigned)optimalDFTSizeTab[b] )
4640	return -`1`;
4641
4642	while( a < b )
4643	{
4644	int c = (a + b) >> `1`;
4645	if( size0 <= optimalDFTSizeTab[c] )
4646	b = c;
4647	else
4648	a = c+`1`;
4649	}
4650
4651	return optimalDFTSizeTab[b];
4652	}
4653
4654
4655	#ifndef OPENCV_EXCLUDE_C_API
4656
4657	CV_IMPL void
4658	cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows )
4659	{
4660	cv::Mat src = cv::cvarrToMat(arr: srcarr), dst0 = cv::cvarrToMat(arr: dstarr), dst = dst0;
4661	int _flags = ((flags & CV_DXT_INVERSE) ? cv::DFT_INVERSE : `0`) \|
4662	((flags & CV_DXT_SCALE) ? cv::DFT_SCALE : `0`) \|
4663	((flags & CV_DXT_ROWS) ? cv::DFT_ROWS : `0`);
4664
4665	CV_Assert( src.size == dst.size );
4666
4667	if( src.type() != dst.type() )
4668	{
4669	if( dst.channels() == `2` )
4670	_flags \|= cv::DFT_COMPLEX_OUTPUT;
4671	else
4672	_flags \|= cv::DFT_REAL_OUTPUT;
4673	}
4674
4675	cv::dft( src0: src, dst: dst, flags: _flags, nonzero_rows );
4676	CV_Assert( dst.data == dst0.data ); // otherwise it means that the destination size or type was incorrect
4677	}
4678
4679
4680	CV_IMPL void
4681	cvMulSpectrums( const CvArr* srcAarr, const CvArr* srcBarr,
4682	CvArr* dstarr, int flags )
4683	{
4684	cv::Mat srcA = cv::cvarrToMat(arr: srcAarr),
4685	srcB = cv::cvarrToMat(arr: srcBarr),
4686	dst = cv::cvarrToMat(arr: dstarr);
4687	CV_Assert( srcA.size == dst.size && srcA.type() == dst.type() );
4688
4689	cv::mulSpectrums(srcA: srcA, srcB: srcB, dst: dst,
4690	flags: (flags & CV_DXT_ROWS) ? cv::DFT_ROWS : `0`,
4691	conjB: (flags & CV_DXT_MUL_CONJ) != `0` );
4692	}
4693
4694
4695	CV_IMPL void
4696	cvDCT( const CvArr* srcarr, CvArr* dstarr, int flags )
4697	{
4698	cv::Mat src = cv::cvarrToMat(arr: srcarr), dst = cv::cvarrToMat(arr: dstarr);
4699	CV_Assert( src.size == dst.size && src.type() == dst.type() );
4700	int _flags = ((flags & CV_DXT_INVERSE) ? cv::DCT_INVERSE : `0`) \|
4701	((flags & CV_DXT_ROWS) ? cv::DCT_ROWS : `0`);
4702	cv::dct( src0: src, dst: dst, flags: _flags );
4703	}
4704
4705
4706	CV_IMPL int
4707	cvGetOptimalDFTSize( int size0 )
4708	{
4709	return cv::getOptimalDFTSize(size0);
4710	}
4711
4712	#endif // OPENCV_EXCLUDE_C_API
4713	/ End of file. /
4714

source code of opencv/modules/core/src/dxt.cpp