1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2026 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#define INCLUDE_STRING
21#define IN_TARGET_CODE 1
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "rtl.h"
28#include "tree.h"
29#include "memmodel.h"
30#include "gimple.h"
31#include "cfghooks.h"
32#include "cfgloop.h"
33#include "df.h"
34#include "tm_p.h"
35#include "stringpool.h"
36#include "expmed.h"
37#include "optabs.h"
38#include "regs.h"
39#include "emit-rtl.h"
40#include "recog.h"
41#include "cgraph.h"
42#include "diagnostic.h"
43#include "cfgbuild.h"
44#include "alias.h"
45#include "fold-const.h"
46#include "attribs.h"
47#include "calls.h"
48#include "stor-layout.h"
49#include "varasm.h"
50#include "output.h"
51#include "insn-attr.h"
52#include "flags.h"
53#include "except.h"
54#include "explow.h"
55#include "expr.h"
56#include "cfgrtl.h"
57#include "common/common-target.h"
58#include "langhooks.h"
59#include "reload.h"
60#include "gimplify.h"
61#include "dwarf2.h"
62#include "tm-constrs.h"
63#include "cselib.h"
64#include "sched-int.h"
65#include "opts.h"
66#include "tree-pass.h"
67#include "context.h"
68#include "pass_manager.h"
69#include "target-globals.h"
70#include "gimple-iterator.h"
71#include "gimple-fold.h"
72#include "tree-vectorizer.h"
73#include "shrink-wrap.h"
74#include "builtins.h"
75#include "rtl-iter.h"
76#include "tree-iterator.h"
77#include "dbgcnt.h"
78#include "case-cfn-macros.h"
79#include "dojump.h"
80#include "fold-const-call.h"
81#include "tree-vrp.h"
82#include "tree-ssanames.h"
83#include "selftest.h"
84#include "selftest-rtl.h"
85#include "print-rtl.h"
86#include "intl.h"
87#include "ifcvt.h"
88#include "symbol-summary.h"
89#include "sreal.h"
90#include "ipa-cp.h"
91#include "ipa-prop.h"
92#include "ipa-fnsummary.h"
93#include "wide-int-bitmask.h"
94#include "tree-vector-builder.h"
95#include "debug.h"
96#include "dwarf2out.h"
97#include "i386-options.h"
98#include "i386-builtins.h"
99#include "i386-expand.h"
100#include "i386-features.h"
101#include "function-abi.h"
102#include "rtl-error.h"
103#include "gimple-pretty-print.h"
104
105/* This file should be included last. */
106#include "target-def.h"
107
108static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
109static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
110
111
112#ifndef CHECK_STACK_LIMIT
113#define CHECK_STACK_LIMIT (-1)
114#endif
115
116/* Return index of given mode in mult and division cost tables. */
117#define MODE_INDEX(mode) \
118 ((mode) == QImode ? 0 \
119 : (mode) == HImode ? 1 \
120 : (mode) == SImode ? 2 \
121 : (mode) == DImode ? 3 \
122 : 4)
123
124
125/* Set by -mtune. */
126const struct processor_costs *ix86_tune_cost = NULL;
127
128/* Set by -mtune or -Os. */
129const struct processor_costs *ix86_cost = NULL;
130
131/* In case the average insn count for single function invocation is
132 lower than this constant, emit fast (but longer) prologue and
133 epilogue code. */
134#define FAST_PROLOGUE_INSN_COUNT 20
135
136/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
137static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
138static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
139static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
140
141/* Array of the smallest class containing reg number REGNO, indexed by
142 REGNO. Used by REGNO_REG_CLASS in i386.h. */
143
144enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
145{
146 /* ax, dx, cx, bx */
147 AREG, DREG, CREG, BREG,
148 /* si, di, bp, sp */
149 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
150 /* FP registers */
151 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
152 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
153 /* arg pointer, flags, fpsr, frame */
154 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
155 /* SSE registers */
156 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
157 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
158 /* MMX registers */
159 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
160 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
161 /* REX registers */
162 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
164 /* SSE REX registers */
165 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
166 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
167 /* AVX-512 SSE registers */
168 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
169 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
170 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
171 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
172 /* Mask registers. */
173 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
174 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
175 /* REX2 registers */
176 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
177 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
178 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
179 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
180};
181
182/* The "default" register map used in 32bit mode. */
183
184unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
185{
186 /* general regs */
187 0, 2, 1, 3, 6, 7, 4, 5,
188 /* fp regs */
189 12, 13, 14, 15, 16, 17, 18, 19,
190 /* arg, flags, fpsr, frame */
191 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
192 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
193 /* SSE */
194 21, 22, 23, 24, 25, 26, 27, 28,
195 /* MMX */
196 29, 30, 31, 32, 33, 34, 35, 36,
197 /* extended integer registers */
198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 /* extended sse registers */
201 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
202 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
203 /* AVX-512 registers 16-23 */
204 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
205 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
206 /* AVX-512 registers 24-31 */
207 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
208 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
209 /* Mask registers */
210 93, 94, 95, 96, 97, 98, 99, 100
211};
212
213/* The "default" register map used in 64bit mode. */
214
215unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
216{
217 /* general regs */
218 0, 1, 2, 3, 4, 5, 6, 7,
219 /* fp regs */
220 33, 34, 35, 36, 37, 38, 39, 40,
221 /* arg, flags, fpsr, frame */
222 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
223 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
224 /* SSE */
225 17, 18, 19, 20, 21, 22, 23, 24,
226 /* MMX */
227 41, 42, 43, 44, 45, 46, 47, 48,
228 /* extended integer registers */
229 8, 9, 10, 11, 12, 13, 14, 15,
230 /* extended SSE registers */
231 25, 26, 27, 28, 29, 30, 31, 32,
232 /* AVX-512 registers 16-23 */
233 67, 68, 69, 70, 71, 72, 73, 74,
234 /* AVX-512 registers 24-31 */
235 75, 76, 77, 78, 79, 80, 81, 82,
236 /* Mask registers */
237 118, 119, 120, 121, 122, 123, 124, 125,
238 /* rex2 extend interger registers */
239 130, 131, 132, 133, 134, 135, 136, 137,
240 138, 139, 140, 141, 142, 143, 144, 145
241};
242
243/* Define the register numbers to be used in Dwarf debugging information.
244 The SVR4 reference port C compiler uses the following register numbers
245 in its Dwarf output code:
246 0 for %eax (gcc regno = 0)
247 1 for %ecx (gcc regno = 2)
248 2 for %edx (gcc regno = 1)
249 3 for %ebx (gcc regno = 3)
250 4 for %esp (gcc regno = 7)
251 5 for %ebp (gcc regno = 6)
252 6 for %esi (gcc regno = 4)
253 7 for %edi (gcc regno = 5)
254 The following three DWARF register numbers are never generated by
255 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
256 believed these numbers have these meanings.
257 8 for %eip (no gcc equivalent)
258 9 for %eflags (gcc regno = 17)
259 10 for %trapno (no gcc equivalent)
260 It is not at all clear how we should number the FP stack registers
261 for the x86 architecture. If the version of SDB on x86/svr4 were
262 a bit less brain dead with respect to floating-point then we would
263 have a precedent to follow with respect to DWARF register numbers
264 for x86 FP registers, but the SDB on x86/svr4 was so completely
265 broken with respect to FP registers that it is hardly worth thinking
266 of it as something to strive for compatibility with.
267 The version of x86/svr4 SDB I had does (partially)
268 seem to believe that DWARF register number 11 is associated with
269 the x86 register %st(0), but that's about all. Higher DWARF
270 register numbers don't seem to be associated with anything in
271 particular, and even for DWARF regno 11, SDB only seemed to under-
272 stand that it should say that a variable lives in %st(0) (when
273 asked via an `=' command) if we said it was in DWARF regno 11,
274 but SDB still printed garbage when asked for the value of the
275 variable in question (via a `/' command).
276 (Also note that the labels SDB printed for various FP stack regs
277 when doing an `x' command were all wrong.)
278 Note that these problems generally don't affect the native SVR4
279 C compiler because it doesn't allow the use of -O with -g and
280 because when it is *not* optimizing, it allocates a memory
281 location for each floating-point variable, and the memory
282 location is what gets described in the DWARF AT_location
283 attribute for the variable in question.
284 Regardless of the severe mental illness of the x86/svr4 SDB, we
285 do something sensible here and we use the following DWARF
286 register numbers. Note that these are all stack-top-relative
287 numbers.
288 11 for %st(0) (gcc regno = 8)
289 12 for %st(1) (gcc regno = 9)
290 13 for %st(2) (gcc regno = 10)
291 14 for %st(3) (gcc regno = 11)
292 15 for %st(4) (gcc regno = 12)
293 16 for %st(5) (gcc regno = 13)
294 17 for %st(6) (gcc regno = 14)
295 18 for %st(7) (gcc regno = 15)
296*/
297unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
298{
299 /* general regs */
300 0, 2, 1, 3, 6, 7, 5, 4,
301 /* fp regs */
302 11, 12, 13, 14, 15, 16, 17, 18,
303 /* arg, flags, fpsr, frame */
304 IGNORED_DWARF_REGNUM, 9,
305 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
306 /* SSE registers */
307 21, 22, 23, 24, 25, 26, 27, 28,
308 /* MMX registers */
309 29, 30, 31, 32, 33, 34, 35, 36,
310 /* extended integer registers */
311 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
312 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
313 /* extended sse registers */
314 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
315 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
316 /* AVX-512 registers 16-23 */
317 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
318 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
319 /* AVX-512 registers 24-31 */
320 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
321 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
322 /* Mask registers */
323 93, 94, 95, 96, 97, 98, 99, 100
324};
325
326/* Define parameter passing and return registers. */
327
328static int const x86_64_int_parameter_registers[6] =
329{
330 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
331};
332
333static int const x86_64_ms_abi_int_parameter_registers[4] =
334{
335 CX_REG, DX_REG, R8_REG, R9_REG
336};
337
338/* Similar as Clang's preserve_none function parameter passing.
339 NB: Use DI_REG and SI_REG, see ix86_function_value_regno_p. */
340
341static int const x86_64_preserve_none_int_parameter_registers[6] =
342{
343 R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
344};
345
346static int const x86_64_int_return_registers[4] =
347{
348 AX_REG, DX_REG, DI_REG, SI_REG
349};
350
351/* Define the structure for the machine field in struct function. */
352
353struct GTY(()) stack_local_entry {
354 unsigned short mode;
355 unsigned short n;
356 rtx rtl;
357 struct stack_local_entry *next;
358};
359
360/* Which cpu are we scheduling for. */
361enum attr_cpu ix86_schedule;
362
363/* Which cpu are we optimizing for. */
364enum processor_type ix86_tune;
365
366/* Which instruction set architecture to use. */
367enum processor_type ix86_arch;
368
369/* True if processor has SSE prefetch instruction. */
370unsigned char ix86_prefetch_sse;
371
372/* Preferred alignment for stack boundary in bits. */
373unsigned int ix86_preferred_stack_boundary;
374
375/* Alignment for incoming stack boundary in bits specified at
376 command line. */
377unsigned int ix86_user_incoming_stack_boundary;
378
379/* Default alignment for incoming stack boundary in bits. */
380unsigned int ix86_default_incoming_stack_boundary;
381
382/* Alignment for incoming stack boundary in bits. */
383unsigned int ix86_incoming_stack_boundary;
384
385/* True if there is no direct access to extern symbols. */
386bool ix86_has_no_direct_extern_access;
387
388/* Calling abi specific va_list type nodes. */
389tree sysv_va_list_type_node;
390tree ms_va_list_type_node;
391
392/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
393char internal_label_prefix[16];
394int internal_label_prefix_len;
395
396/* Fence to use after loop using movnt. */
397tree x86_mfence;
398
399/* Register class used for passing given 64bit part of the argument.
400 These represent classes as documented by the PS ABI, with the exception
401 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
402 use SF or DFmode move instead of DImode to avoid reformatting penalties.
403
404 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
405 whenever possible (upper half does contain padding). */
406enum x86_64_reg_class
407 {
408 X86_64_NO_CLASS,
409 X86_64_INTEGER_CLASS,
410 X86_64_INTEGERSI_CLASS,
411 X86_64_SSE_CLASS,
412 X86_64_SSEHF_CLASS,
413 X86_64_SSESF_CLASS,
414 X86_64_SSEDF_CLASS,
415 X86_64_SSEUP_CLASS,
416 X86_64_X87_CLASS,
417 X86_64_X87UP_CLASS,
418 X86_64_COMPLEX_X87_CLASS,
419 X86_64_MEMORY_CLASS
420 };
421
422#define MAX_CLASSES 8
423
424/* Table of constants used by fldpi, fldln2, etc.... */
425static REAL_VALUE_TYPE ext_80387_constants_table [5];
426static bool ext_80387_constants_init;
427
428
429static rtx ix86_function_value (const_tree, const_tree, bool);
430static bool ix86_function_value_regno_p (const unsigned int);
431static unsigned int ix86_function_arg_boundary (machine_mode,
432 const_tree);
433static rtx ix86_static_chain (const_tree, bool);
434static int ix86_function_regparm (const_tree, const_tree);
435static void ix86_compute_frame_layout (void);
436static tree ix86_canonical_va_list_type (tree);
437static unsigned int split_stack_prologue_scratch_regno (void);
438static bool i386_asm_output_addr_const_extra (FILE *, rtx);
439
440static bool ix86_can_inline_p (tree, tree);
441static unsigned int ix86_minimum_incoming_stack_boundary (bool);
442
443typedef enum ix86_flags_cc
444{
445 X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
446 X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
447 X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
448 X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
449} ix86_cc;
450
451static const char *ix86_ccmp_dfv_mapping[] =
452{
453 "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
454 "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
455 "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
456 "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
457};
458
459
460/* Whether -mtune= or -march= were specified */
461int ix86_tune_defaulted;
462int ix86_arch_specified;
463
464/* Return true if a red-zone is in use. We can't use red-zone when
465 there are local indirect jumps, like "indirect_jump" or "tablejump",
466 which jumps to another place in the function, since "call" in the
467 indirect thunk pushes the return address onto stack, destroying
468 red-zone.
469
470 NB: Don't use red-zone for functions with no_caller_saved_registers
471 and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
472 for 31 GPRs or 15 GPRs + 16 XMM registers.
473
474 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
475 for CALL, in red-zone, we can allow local indirect jumps with
476 indirect thunk. */
477
478bool
479ix86_using_red_zone (void)
480{
481 return (TARGET_RED_ZONE
482 && !TARGET_64BIT_MS_ABI
483 && ((!TARGET_APX_EGPR && !TARGET_SSE)
484 || (cfun->machine->call_saved_registers
485 != TYPE_NO_CALLER_SAVED_REGISTERS))
486 && (!cfun->machine->has_local_indirect_jump
487 || cfun->machine->indirect_branch_type == indirect_branch_keep));
488}
489
490/* Return true, if profiling code should be emitted before
491 prologue. Otherwise it returns false.
492 Note: For x86 with "hotfix" it is sorried. */
493static bool
494ix86_profile_before_prologue (void)
495{
496 return flag_fentry != 0;
497}
498
499/* Update register usage after having seen the compiler flags. */
500
501static void
502ix86_conditional_register_usage (void)
503{
504 int i, c_mask;
505
506 /* If there are no caller-saved registers, preserve all registers.
507 except fixed_regs and registers used for function return value
508 since aggregate_value_p checks call_used_regs[regno] on return
509 value. */
510 if (cfun
511 && (cfun->machine->call_saved_registers
512 == TYPE_NO_CALLER_SAVED_REGISTERS))
513 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
514 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
515 call_used_regs[i] = 0;
516
517 /* For 32-bit targets, disable the REX registers. */
518 if (! TARGET_64BIT)
519 {
520 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
521 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
522 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
523 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
524 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
525 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
526 }
527
528 /* See the definition of CALL_USED_REGISTERS in i386.h. */
529 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
530
531 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
532
533 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
534 {
535 /* Set/reset conditionally defined registers from
536 CALL_USED_REGISTERS initializer. */
537 if (call_used_regs[i] > 1)
538 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
539
540 /* Calculate registers of CLOBBERED_REGS register set
541 as call used registers from GENERAL_REGS register set. */
542 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], bit: i)
543 && call_used_regs[i])
544 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], bit: i);
545 }
546
547 /* If MMX is disabled, disable the registers. */
548 if (! TARGET_MMX)
549 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
550
551 /* If SSE is disabled, disable the registers. */
552 if (! TARGET_SSE)
553 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
554
555 /* If the FPU is disabled, disable the registers. */
556 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
557 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
558
559 /* If AVX512F is disabled, disable the registers. */
560 if (! TARGET_AVX512F)
561 {
562 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
563 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
564
565 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
566 }
567
568 /* If APX is disabled, disable the registers. */
569 if (! (TARGET_APX_EGPR && TARGET_64BIT))
570 {
571 for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
572 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
573 }
574}
575
576/* Canonicalize a comparison from one we don't have to one we do have. */
577
578static void
579ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
580 bool op0_preserve_value)
581{
582 /* The order of operands in x87 ficom compare is forced by combine in
583 simplify_comparison () function. Float operator is treated as RTX_OBJ
584 with a precedence over other operators and is always put in the first
585 place. Swap condition and operands to match ficom instruction. */
586 if (!op0_preserve_value
587 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
588 {
589 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
590
591 /* We are called only for compares that are split to SAHF instruction.
592 Ensure that we have setcc/jcc insn for the swapped condition. */
593 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
594 {
595 std::swap (a&: *op0, b&: *op1);
596 *code = (int) scode;
597 return;
598 }
599 }
600
601 /* SUB (a, b) underflows precisely when a < b. Convert
602 (compare (minus (a b)) a) to (compare (a b))
603 to match *sub<mode>_3 pattern. */
604 if (!op0_preserve_value
605 && (*code == GTU || *code == LEU)
606 && GET_CODE (*op0) == MINUS
607 && rtx_equal_p (XEXP (*op0, 0), *op1))
608 {
609 *op1 = XEXP (*op0, 1);
610 *op0 = XEXP (*op0, 0);
611 *code = (int) swap_condition ((enum rtx_code) *code);
612 return;
613 }
614
615 /* Swap operands of GTU comparison to canonicalize
616 addcarry/subborrow comparison. */
617 if (!op0_preserve_value
618 && *code == GTU
619 && GET_CODE (*op0) == PLUS
620 && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode)
621 && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND
622 && GET_CODE (*op1) == ZERO_EXTEND)
623 {
624 std::swap (a&: *op0, b&: *op1);
625 *code = (int) swap_condition ((enum rtx_code) *code);
626 return;
627 }
628}
629
630/* Hook to determine if one function can safely inline another. */
631
632static bool
633ix86_can_inline_p (tree caller, tree callee)
634{
635 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
636 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
637
638 /* Changes of those flags can be tolerated for always inlines. Lets hope
639 user knows what he is doing. */
640 unsigned HOST_WIDE_INT always_inline_safe_mask
641 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
642 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
643 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
644 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
645 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
646 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
647 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
648
649
650 if (!callee_tree)
651 callee_tree = target_option_default_node;
652 if (!caller_tree)
653 caller_tree = target_option_default_node;
654 if (callee_tree == caller_tree)
655 return true;
656
657 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
658 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
659 bool ret = false;
660 bool always_inline
661 = (DECL_DISREGARD_INLINE_LIMITS (callee)
662 && lookup_attribute (attr_name: "always_inline",
663 DECL_ATTRIBUTES (callee)));
664
665 /* If callee only uses GPRs, ignore MASK_80387. */
666 if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
667 always_inline_safe_mask |= MASK_80387;
668
669 cgraph_node *callee_node = cgraph_node::get (decl: callee);
670 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
671 function can inline a SSE2 function but a SSE2 function can't inline
672 a SSE4 function. */
673 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
674 != callee_opts->x_ix86_isa_flags)
675 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
676 != callee_opts->x_ix86_isa_flags2))
677 ret = false;
678
679 /* See if we have the same non-isa options. */
680 else if ((!always_inline
681 && caller_opts->x_target_flags != callee_opts->x_target_flags)
682 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
683 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
684 ret = false;
685
686 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
687 /* If the calle doesn't use FP expressions differences in
688 ix86_fpmath can be ignored. We are called from FEs
689 for multi-versioning call optimization, so beware of
690 ipa_fn_summaries not available. */
691 && (! ipa_fn_summaries
692 || ipa_fn_summaries->get (node: callee_node) == NULL
693 || ipa_fn_summaries->get (node: callee_node)->fp_expressions))
694 ret = false;
695
696 /* At this point we cannot identify whether arch or tune setting
697 comes from target attribute or not. So the most conservative way
698 is to allow the callee that uses default arch and tune string to
699 be inlined. */
700 else if (!strcmp (s1: callee_opts->x_ix86_arch_string, s2: "x86-64")
701 && !strcmp (s1: callee_opts->x_ix86_tune_string, s2: "generic"))
702 ret = true;
703
704 /* See if arch, tune, etc. are the same. As previous ISA flags already
705 checks if callee's ISA is subset of caller's, do not block
706 always_inline attribute for callee even it has different arch. */
707 else if (!always_inline && caller_opts->arch != callee_opts->arch)
708 ret = false;
709
710 else if (!always_inline && caller_opts->tune != callee_opts->tune)
711 ret = false;
712
713 else if (!always_inline
714 && caller_opts->branch_cost != callee_opts->branch_cost)
715 ret = false;
716
717 else
718 ret = true;
719
720 return ret;
721}
722
723/* Return true if this goes in large data/bss. */
724
725static bool
726ix86_in_large_data_p (tree exp)
727{
728 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
729 && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
730 return false;
731
732 if (exp == NULL_TREE)
733 return false;
734
735 /* Functions are never large data. */
736 if (TREE_CODE (exp) == FUNCTION_DECL)
737 return false;
738
739 /* Automatic variables are never large data. */
740 if (VAR_P (exp) && !is_global_var (t: exp))
741 return false;
742
743 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
744 {
745 const char *section = DECL_SECTION_NAME (exp);
746 if (strcmp (s1: section, s2: ".ldata") == 0
747 || strcmp (s1: section, s2: ".lbss") == 0)
748 return true;
749 return false;
750 }
751 else
752 {
753 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
754
755 /* If this is an incomplete type with size 0, then we can't put it
756 in data because it might be too big when completed. Also,
757 int_size_in_bytes returns -1 if size can vary or is larger than
758 an integer in which case also it is safer to assume that it goes in
759 large data. */
760 if (size <= 0 || size > ix86_section_threshold)
761 return true;
762 }
763
764 return false;
765}
766
767/* i386-specific section flag to mark large sections. */
768#define SECTION_LARGE SECTION_MACH_DEP
769
770/* Switch to the appropriate section for output of DECL.
771 DECL is either a `VAR_DECL' node or a constant of some sort.
772 RELOC indicates whether forming the initial value of DECL requires
773 link-time relocations. */
774
775ATTRIBUTE_UNUSED static section *
776x86_64_elf_select_section (tree decl, int reloc,
777 unsigned HOST_WIDE_INT align)
778{
779 if (ix86_in_large_data_p (exp: decl))
780 {
781 const char *sname = NULL;
782 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
783 switch (categorize_decl_for_section (decl, reloc))
784 {
785 case SECCAT_DATA:
786 sname = ".ldata";
787 break;
788 case SECCAT_DATA_REL:
789 sname = ".ldata.rel";
790 break;
791 case SECCAT_DATA_REL_LOCAL:
792 sname = ".ldata.rel.local";
793 break;
794 case SECCAT_DATA_REL_RO:
795 sname = ".ldata.rel.ro";
796 break;
797 case SECCAT_DATA_REL_RO_LOCAL:
798 sname = ".ldata.rel.ro.local";
799 break;
800 case SECCAT_BSS:
801 sname = ".lbss";
802 flags |= SECTION_BSS;
803 break;
804 case SECCAT_RODATA:
805 case SECCAT_RODATA_MERGE_STR:
806 case SECCAT_RODATA_MERGE_STR_INIT:
807 case SECCAT_RODATA_MERGE_CONST:
808 sname = ".lrodata";
809 flags &= ~SECTION_WRITE;
810 break;
811 case SECCAT_SRODATA:
812 case SECCAT_SDATA:
813 case SECCAT_SBSS:
814 gcc_unreachable ();
815 case SECCAT_TEXT:
816 case SECCAT_TDATA:
817 case SECCAT_TBSS:
818 /* We don't split these for medium model. Place them into
819 default sections and hope for best. */
820 break;
821 }
822 if (sname)
823 {
824 /* We might get called with string constants, but get_named_section
825 doesn't like them as they are not DECLs. Also, we need to set
826 flags in that case. */
827 if (!DECL_P (decl))
828 return get_section (sname, flags, NULL);
829 return get_named_section (decl, sname, reloc);
830 }
831 }
832 return default_elf_select_section (decl, reloc, align);
833}
834
835/* Select a set of attributes for section NAME based on the properties
836 of DECL and whether or not RELOC indicates that DECL's initializer
837 might contain runtime relocations. */
838
839static unsigned int ATTRIBUTE_UNUSED
840x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
841{
842 unsigned int flags = default_section_type_flags (decl, name, reloc);
843
844 if (ix86_in_large_data_p (exp: decl))
845 flags |= SECTION_LARGE;
846
847 if (decl == NULL_TREE
848 && (strcmp (s1: name, s2: ".ldata.rel.ro") == 0
849 || strcmp (s1: name, s2: ".ldata.rel.ro.local") == 0))
850 flags |= SECTION_RELRO;
851
852 if (strcmp (s1: name, s2: ".lbss") == 0
853 || startswith (str: name, prefix: ".lbss.")
854 || startswith (str: name, prefix: ".gnu.linkonce.lb."))
855 flags |= SECTION_BSS;
856
857 return flags;
858}
859
860/* Build up a unique section name, expressed as a
861 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
862 RELOC indicates whether the initial value of EXP requires
863 link-time relocations. */
864
865static void ATTRIBUTE_UNUSED
866x86_64_elf_unique_section (tree decl, int reloc)
867{
868 if (ix86_in_large_data_p (exp: decl))
869 {
870 const char *prefix = NULL;
871 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
872 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
873
874 switch (categorize_decl_for_section (decl, reloc))
875 {
876 case SECCAT_DATA:
877 case SECCAT_DATA_REL:
878 case SECCAT_DATA_REL_LOCAL:
879 case SECCAT_DATA_REL_RO:
880 case SECCAT_DATA_REL_RO_LOCAL:
881 prefix = one_only ? ".ld" : ".ldata";
882 break;
883 case SECCAT_BSS:
884 prefix = one_only ? ".lb" : ".lbss";
885 break;
886 case SECCAT_RODATA:
887 case SECCAT_RODATA_MERGE_STR:
888 case SECCAT_RODATA_MERGE_STR_INIT:
889 case SECCAT_RODATA_MERGE_CONST:
890 prefix = one_only ? ".lr" : ".lrodata";
891 break;
892 case SECCAT_SRODATA:
893 case SECCAT_SDATA:
894 case SECCAT_SBSS:
895 gcc_unreachable ();
896 case SECCAT_TEXT:
897 case SECCAT_TDATA:
898 case SECCAT_TBSS:
899 /* We don't split these for medium model. Place them into
900 default sections and hope for best. */
901 break;
902 }
903 if (prefix)
904 {
905 const char *name, *linkonce;
906 char *string;
907
908 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
909 name = targetm.strip_name_encoding (name);
910
911 /* If we're using one_only, then there needs to be a .gnu.linkonce
912 prefix to the section name. */
913 linkonce = one_only ? ".gnu.linkonce" : "";
914
915 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
916
917 set_decl_section_name (decl, string);
918 return;
919 }
920 }
921 default_unique_section (decl, reloc);
922}
923
924/* Return true if TYPE has no_callee_saved_registers or preserve_none
925 attribute. */
926
927bool
928ix86_type_no_callee_saved_registers_p (const_tree type)
929{
930 return (lookup_attribute (attr_name: "no_callee_saved_registers",
931 TYPE_ATTRIBUTES (type)) != NULL
932 || lookup_attribute (attr_name: "preserve_none",
933 TYPE_ATTRIBUTES (type)) != NULL);
934}
935
936#ifdef COMMON_ASM_OP
937
938#ifndef LARGECOMM_SECTION_ASM_OP
939#define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
940#endif
941
942/* This says how to output assembler code to declare an
943 uninitialized external linkage data object.
944
945 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
946 large objects. */
947void
948x86_elf_aligned_decl_common (FILE *file, tree decl,
949 const char *name, unsigned HOST_WIDE_INT size,
950 unsigned align)
951{
952 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
953 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
954 && size > (unsigned int)ix86_section_threshold)
955 {
956 switch_to_section (get_named_section (decl, ".lbss", 0));
957 fputs (LARGECOMM_SECTION_ASM_OP, stream: file);
958 }
959 else
960 fputs (COMMON_ASM_OP, stream: file);
961 assemble_name (file, name);
962 fprintf (stream: file, format: "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
963 size, align / BITS_PER_UNIT);
964}
965#endif
966
967/* Utility function for targets to use in implementing
968 ASM_OUTPUT_ALIGNED_BSS. */
969
970void
971x86_output_aligned_bss (FILE *file, tree decl, const char *name,
972 unsigned HOST_WIDE_INT size, unsigned align)
973{
974 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
975 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
976 && size > (unsigned int)ix86_section_threshold)
977 switch_to_section (get_named_section (decl, ".lbss", 0));
978 else
979 switch_to_section (bss_section);
980 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
981#ifdef ASM_DECLARE_OBJECT_NAME
982 last_assemble_variable_decl = decl;
983 ASM_DECLARE_OBJECT_NAME (file, name, decl);
984#else
985 /* Standard thing is just output label for the object. */
986 ASM_OUTPUT_LABEL (file, name);
987#endif /* ASM_DECLARE_OBJECT_NAME */
988 ASM_OUTPUT_SKIP (file, size ? size : 1);
989}
990
991/* Decide whether we must probe the stack before any space allocation
992 on this target. It's essentially TARGET_STACK_PROBE except when
993 -fstack-check causes the stack to be already probed differently. */
994
995bool
996ix86_target_stack_probe (void)
997{
998 /* Do not probe the stack twice if static stack checking is enabled. */
999 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
1000 return false;
1001
1002 return TARGET_STACK_PROBE;
1003}
1004
1005/* Decide whether we can make a sibling call to a function. DECL is the
1006 declaration of the function being targeted by the call and EXP is the
1007 CALL_EXPR representing the call. */
1008
1009static bool
1010ix86_function_ok_for_sibcall (tree decl, tree exp)
1011{
1012 tree type, decl_or_type;
1013 rtx a, b;
1014 bool bind_global = decl && !targetm.binds_local_p (decl);
1015
1016 if (ix86_function_naked (fn: current_function_decl))
1017 return false;
1018
1019 /* Sibling call isn't OK if there are no caller-saved registers
1020 since all registers must be preserved before return. */
1021 if (cfun->machine->call_saved_registers
1022 == TYPE_NO_CALLER_SAVED_REGISTERS)
1023 return false;
1024
1025 /* If we are generating position-independent code, we cannot sibcall
1026 optimize direct calls to global functions, as the PLT requires
1027 %ebx be live. (Darwin does not have a PLT.) */
1028 if (!TARGET_MACHO
1029 && !TARGET_64BIT
1030 && flag_pic
1031 && flag_plt
1032 && bind_global)
1033 return false;
1034
1035 /* If we need to align the outgoing stack, then sibcalling would
1036 unalign the stack, which may break the called function. */
1037 if (ix86_minimum_incoming_stack_boundary (true)
1038 < PREFERRED_STACK_BOUNDARY)
1039 return false;
1040
1041 if (decl)
1042 {
1043 decl_or_type = decl;
1044 type = TREE_TYPE (decl);
1045 }
1046 else
1047 {
1048 /* We're looking at the CALL_EXPR, we need the type of the function. */
1049 type = CALL_EXPR_FN (exp); /* pointer expression */
1050 type = TREE_TYPE (type); /* pointer type */
1051 type = TREE_TYPE (type); /* function type */
1052 decl_or_type = type;
1053 }
1054
1055 /* Sibling call isn't OK if callee has no callee-saved registers
1056 and the calling function has callee-saved registers. */
1057 if ((cfun->machine->call_saved_registers
1058 != TYPE_NO_CALLEE_SAVED_REGISTERS)
1059 && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
1060 && ix86_type_no_callee_saved_registers_p (type))
1061 return false;
1062
1063 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
1064 if ((OUTGOING_REG_PARM_STACK_SPACE (type)
1065 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
1066 || (REG_PARM_STACK_SPACE (decl_or_type)
1067 != REG_PARM_STACK_SPACE (current_function_decl)))
1068 {
1069 maybe_complain_about_tail_call (exp,
1070 "inconsistent size of stack space"
1071 " allocated for arguments which are"
1072 " passed in registers");
1073 return false;
1074 }
1075
1076 /* Check that the return value locations are the same. Like
1077 if we are returning floats on the 80387 register stack, we cannot
1078 make a sibcall from a function that doesn't return a float to a
1079 function that does or, conversely, from a function that does return
1080 a float to a function that doesn't; the necessary stack adjustment
1081 would not be executed. This is also the place we notice
1082 differences in the return value ABI. Note that it is ok for one
1083 of the functions to have void return type as long as the return
1084 value of the other is passed in a register. */
1085 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
1086 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1087 cfun->decl, false);
1088 if (STACK_REG_P (a) || STACK_REG_P (b))
1089 {
1090 if (!rtx_equal_p (a, b))
1091 return false;
1092 }
1093 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1094 ;
1095 else if (!rtx_equal_p (a, b))
1096 return false;
1097
1098 if (TARGET_64BIT)
1099 {
1100 /* The SYSV ABI has more call-clobbered registers;
1101 disallow sibcalls from MS to SYSV. */
1102 if (cfun->machine->call_abi == MS_ABI
1103 && ix86_function_type_abi (type) == SYSV_ABI)
1104 return false;
1105 }
1106 else
1107 {
1108 /* If this call is indirect, we'll need to be able to use a
1109 call-clobbered register for the address of the target function.
1110 Make sure that all such registers are not used for passing
1111 parameters. Note that DLLIMPORT functions and call to global
1112 function via GOT slot are indirect. */
1113 if (!decl
1114 || (bind_global && flag_pic && !flag_plt)
1115 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1116 || flag_force_indirect_call)
1117 {
1118 /* Check if regparm >= 3 since arg_reg_available is set to
1119 false if regparm == 0. If regparm is 1 or 2, there is
1120 always a call-clobbered register available.
1121
1122 ??? The symbol indirect call doesn't need a call-clobbered
1123 register. But we don't know if this is a symbol indirect
1124 call or not here. */
1125 if (ix86_function_regparm (type, decl) >= 3
1126 && !cfun->machine->arg_reg_available)
1127 return false;
1128 }
1129 }
1130
1131 if (decl && ix86_use_pseudo_pic_reg ())
1132 {
1133 /* When PIC register is used, it must be restored after ifunc
1134 function returns. */
1135 cgraph_node *node = cgraph_node::get (decl);
1136 if (node && node->ifunc_resolver)
1137 return false;
1138 }
1139
1140 /* Disable sibcall if callee has indirect_return attribute and
1141 caller doesn't since callee will return to the caller's caller
1142 via an indirect jump. */
1143 if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
1144 == (CF_RETURN | CF_BRANCH))
1145 && lookup_attribute (attr_name: "indirect_return", TYPE_ATTRIBUTES (type))
1146 && !lookup_attribute (attr_name: "indirect_return",
1147 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
1148 return false;
1149
1150 /* Otherwise okay. That also includes certain types of indirect calls. */
1151 return true;
1152}
1153
1154/* This function determines from TYPE the calling-convention. */
1155
1156unsigned int
1157ix86_get_callcvt (const_tree type)
1158{
1159 unsigned int ret = 0;
1160 bool is_stdarg;
1161 tree attrs;
1162
1163 if (TARGET_64BIT)
1164 return IX86_CALLCVT_CDECL;
1165
1166 attrs = TYPE_ATTRIBUTES (type);
1167 if (attrs != NULL_TREE)
1168 {
1169 if (lookup_attribute (attr_name: "cdecl", list: attrs))
1170 ret |= IX86_CALLCVT_CDECL;
1171 else if (lookup_attribute (attr_name: "stdcall", list: attrs))
1172 ret |= IX86_CALLCVT_STDCALL;
1173 else if (lookup_attribute (attr_name: "fastcall", list: attrs))
1174 ret |= IX86_CALLCVT_FASTCALL;
1175 else if (lookup_attribute (attr_name: "thiscall", list: attrs))
1176 ret |= IX86_CALLCVT_THISCALL;
1177
1178 /* Regparam isn't allowed for thiscall and fastcall. */
1179 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1180 {
1181 if (lookup_attribute (attr_name: "regparm", list: attrs))
1182 ret |= IX86_CALLCVT_REGPARM;
1183 if (lookup_attribute (attr_name: "sseregparm", list: attrs))
1184 ret |= IX86_CALLCVT_SSEREGPARM;
1185 }
1186
1187 if (IX86_BASE_CALLCVT(ret) != 0)
1188 return ret;
1189 }
1190
1191 is_stdarg = stdarg_p (type);
1192 if (TARGET_RTD && !is_stdarg)
1193 return IX86_CALLCVT_STDCALL | ret;
1194
1195 if (ret != 0
1196 || is_stdarg
1197 || TREE_CODE (type) != METHOD_TYPE
1198 || ix86_function_type_abi (type) != MS_ABI)
1199 return IX86_CALLCVT_CDECL | ret;
1200
1201 return IX86_CALLCVT_THISCALL;
1202}
1203
1204/* Return 0 if the attributes for two types are incompatible, 1 if they
1205 are compatible, and 2 if they are nearly compatible (which causes a
1206 warning to be generated). */
1207
1208static int
1209ix86_comp_type_attributes (const_tree type1, const_tree type2)
1210{
1211 unsigned int ccvt1, ccvt2;
1212
1213 if (TREE_CODE (type1) != FUNCTION_TYPE
1214 && TREE_CODE (type1) != METHOD_TYPE)
1215 return 1;
1216
1217 ccvt1 = ix86_get_callcvt (type: type1);
1218 ccvt2 = ix86_get_callcvt (type: type2);
1219 if (ccvt1 != ccvt2)
1220 return 0;
1221 if (ix86_function_regparm (type1, NULL)
1222 != ix86_function_regparm (type2, NULL))
1223 return 0;
1224
1225 if (ix86_type_no_callee_saved_registers_p (type: type1)
1226 != ix86_type_no_callee_saved_registers_p (type: type2))
1227 return 0;
1228
1229 /* preserve_none attribute uses a different calling convention is
1230 only for 64-bit. */
1231 if (TARGET_64BIT
1232 && (lookup_attribute (attr_name: "preserve_none", TYPE_ATTRIBUTES (type1))
1233 != lookup_attribute (attr_name: "preserve_none",
1234 TYPE_ATTRIBUTES (type2))))
1235 return 0;
1236
1237 return 1;
1238}
1239
1240/* Return the regparm value for a function with the indicated TYPE and DECL.
1241 DECL may be NULL when calling function indirectly
1242 or considering a libcall. */
1243
1244static int
1245ix86_function_regparm (const_tree type, const_tree decl)
1246{
1247 tree attr;
1248 int regparm;
1249 unsigned int ccvt;
1250
1251 if (TARGET_64BIT)
1252 return (ix86_function_type_abi (type) == SYSV_ABI
1253 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1254 ccvt = ix86_get_callcvt (type);
1255 regparm = ix86_regparm;
1256
1257 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1258 {
1259 attr = lookup_attribute (attr_name: "regparm", TYPE_ATTRIBUTES (type));
1260 if (attr)
1261 {
1262 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1263 return regparm;
1264 }
1265 }
1266 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1267 return 2;
1268 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1269 return 1;
1270
1271 /* Use register calling convention for local functions when possible. */
1272 if (decl
1273 && TREE_CODE (decl) == FUNCTION_DECL)
1274 {
1275 cgraph_node *target = cgraph_node::get (decl);
1276 if (target)
1277 target = target->function_symbol ();
1278
1279 /* Caller and callee must agree on the calling convention, so
1280 checking here just optimize means that with
1281 __attribute__((optimize (...))) caller could use regparm convention
1282 and callee not, or vice versa. Instead look at whether the callee
1283 is optimized or not. */
1284 if (target && opt_for_fn (target->decl, optimize)
1285 && !(profile_flag && !flag_fentry))
1286 {
1287 if (target->local && target->can_change_signature)
1288 {
1289 int local_regparm, globals = 0, regno;
1290
1291 /* Make sure no regparm register is taken by a
1292 fixed register variable. */
1293 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1294 local_regparm++)
1295 if (fixed_regs[local_regparm])
1296 break;
1297
1298 /* We don't want to use regparm(3) for nested functions as
1299 these use a static chain pointer in the third argument. */
1300 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1301 local_regparm = 2;
1302
1303 /* Save a register for the split stack. */
1304 if (flag_split_stack)
1305 {
1306 if (local_regparm == 3)
1307 local_regparm = 2;
1308 else if (local_regparm == 2
1309 && DECL_STATIC_CHAIN (target->decl))
1310 local_regparm = 1;
1311 }
1312
1313 /* Each fixed register usage increases register pressure,
1314 so less registers should be used for argument passing.
1315 This functionality can be overriden by an explicit
1316 regparm value. */
1317 for (regno = AX_REG; regno <= DI_REG; regno++)
1318 if (fixed_regs[regno])
1319 globals++;
1320
1321 local_regparm
1322 = globals < local_regparm ? local_regparm - globals : 0;
1323
1324 if (local_regparm > regparm)
1325 regparm = local_regparm;
1326 }
1327 }
1328 }
1329
1330 return regparm;
1331}
1332
1333/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1334 DFmode (2) arguments in SSE registers for a function with the
1335 indicated TYPE and DECL. DECL may be NULL when calling function
1336 indirectly or considering a libcall. Return -1 if any FP parameter
1337 should be rejected by error. This is used in siutation we imply SSE
1338 calling convetion but the function is called from another function with
1339 SSE disabled. Otherwise return 0. */
1340
1341static int
1342ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1343{
1344 gcc_assert (!TARGET_64BIT);
1345
1346 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1347 by the sseregparm attribute. */
1348 if (TARGET_SSEREGPARM
1349 || (type && lookup_attribute (attr_name: "sseregparm", TYPE_ATTRIBUTES (type))))
1350 {
1351 if (!TARGET_SSE)
1352 {
1353 if (warn)
1354 {
1355 if (decl)
1356 error ("calling %qD with attribute sseregparm without "
1357 "SSE/SSE2 enabled", decl);
1358 else
1359 error ("calling %qT with attribute sseregparm without "
1360 "SSE/SSE2 enabled", type);
1361 }
1362 return 0;
1363 }
1364
1365 return 2;
1366 }
1367
1368 if (!decl)
1369 return 0;
1370
1371 cgraph_node *target = cgraph_node::get (decl);
1372 if (target)
1373 target = target->function_symbol ();
1374
1375 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1376 (and DFmode for SSE2) arguments in SSE registers. */
1377 if (target
1378 /* TARGET_SSE_MATH */
1379 && (target_opts_for_fn (fndecl: target->decl)->x_ix86_fpmath & FPMATH_SSE)
1380 && opt_for_fn (target->decl, optimize)
1381 && !(profile_flag && !flag_fentry))
1382 {
1383 if (target->local && target->can_change_signature)
1384 {
1385 /* Refuse to produce wrong code when local function with SSE enabled
1386 is called from SSE disabled function.
1387 FIXME: We need a way to detect these cases cross-ltrans partition
1388 and avoid using SSE calling conventions on local functions called
1389 from function with SSE disabled. For now at least delay the
1390 warning until we know we are going to produce wrong code.
1391 See PR66047 */
1392 if (!TARGET_SSE && warn)
1393 return -1;
1394 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1395 ->x_ix86_isa_flags) ? 2 : 1;
1396 }
1397 }
1398
1399 return 0;
1400}
1401
1402/* Return true if EAX is live at the start of the function. Used by
1403 ix86_expand_prologue to determine if we need special help before
1404 calling allocate_stack_worker. */
1405
1406static bool
1407ix86_eax_live_at_start_p (void)
1408{
1409 /* Cheat. Don't bother working forward from ix86_function_regparm
1410 to the function type to whether an actual argument is located in
1411 eax. Instead just look at cfg info, which is still close enough
1412 to correct at this point. This gives false positives for broken
1413 functions that might use uninitialized data that happens to be
1414 allocated in eax, but who cares? */
1415 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1416}
1417
1418static bool
1419ix86_keep_aggregate_return_pointer (tree fntype)
1420{
1421 tree attr;
1422
1423 if (!TARGET_64BIT)
1424 {
1425 attr = lookup_attribute (attr_name: "callee_pop_aggregate_return",
1426 TYPE_ATTRIBUTES (fntype));
1427 if (attr)
1428 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1429
1430 /* For 32-bit MS-ABI the default is to keep aggregate
1431 return pointer. */
1432 if (ix86_function_type_abi (fntype) == MS_ABI)
1433 return true;
1434 }
1435 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1436}
1437
1438/* Value is the number of bytes of arguments automatically
1439 popped when returning from a subroutine call.
1440 FUNDECL is the declaration node of the function (as a tree),
1441 FUNTYPE is the data type of the function (as a tree),
1442 or for a library call it is an identifier node for the subroutine name.
1443 SIZE is the number of bytes of arguments passed on the stack.
1444
1445 On the 80386, the RTD insn may be used to pop them if the number
1446 of args is fixed, but if the number is variable then the caller
1447 must pop them all. RTD can't be used for library calls now
1448 because the library is compiled with the Unix compiler.
1449 Use of RTD is a selectable option, since it is incompatible with
1450 standard Unix calling sequences. If the option is not selected,
1451 the caller must always pop the args.
1452
1453 The attribute stdcall is equivalent to RTD on a per module basis. */
1454
1455static poly_int64
1456ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1457{
1458 unsigned int ccvt;
1459
1460 /* None of the 64-bit ABIs pop arguments. */
1461 if (TARGET_64BIT)
1462 return 0;
1463
1464 ccvt = ix86_get_callcvt (type: funtype);
1465
1466 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1467 | IX86_CALLCVT_THISCALL)) != 0
1468 && ! stdarg_p (funtype))
1469 return size;
1470
1471 /* Lose any fake structure return argument if it is passed on the stack. */
1472 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1473 && !ix86_keep_aggregate_return_pointer (fntype: funtype))
1474 {
1475 int nregs = ix86_function_regparm (type: funtype, decl: fundecl);
1476 if (nregs == 0)
1477 return GET_MODE_SIZE (Pmode);
1478 }
1479
1480 return 0;
1481}
1482
1483/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1484
1485static bool
1486ix86_legitimate_combined_insn (rtx_insn *insn)
1487{
1488 int i;
1489
1490 /* Check operand constraints in case hard registers were propagated
1491 into insn pattern. This check prevents combine pass from
1492 generating insn patterns with invalid hard register operands.
1493 These invalid insns can eventually confuse reload to error out
1494 with a spill failure. See also PRs 46829 and 46843. */
1495
1496 gcc_assert (INSN_CODE (insn) >= 0);
1497
1498 extract_insn (insn);
1499 preprocess_constraints (insn);
1500
1501 int n_operands = recog_data.n_operands;
1502 int n_alternatives = recog_data.n_alternatives;
1503 for (i = 0; i < n_operands; i++)
1504 {
1505 rtx op = recog_data.operand[i];
1506 machine_mode mode = GET_MODE (op);
1507 const operand_alternative *op_alt;
1508 int offset = 0;
1509 bool win;
1510 int j;
1511
1512 /* A unary operator may be accepted by the predicate, but it
1513 is irrelevant for matching constraints. */
1514 if (UNARY_P (op))
1515 op = XEXP (op, 0);
1516
1517 if (SUBREG_P (op))
1518 {
1519 if (REG_P (SUBREG_REG (op))
1520 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1521 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1522 GET_MODE (SUBREG_REG (op)),
1523 SUBREG_BYTE (op),
1524 GET_MODE (op));
1525 op = SUBREG_REG (op);
1526 }
1527
1528 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1529 continue;
1530
1531 op_alt = recog_op_alt;
1532
1533 /* Operand has no constraints, anything is OK. */
1534 win = !n_alternatives;
1535
1536 alternative_mask preferred = get_preferred_alternatives (insn);
1537 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1538 {
1539 if (!TEST_BIT (preferred, j))
1540 continue;
1541 if (op_alt[i].anything_ok
1542 || (op_alt[i].matches != -1
1543 && operands_match_p
1544 (recog_data.operand[i],
1545 recog_data.operand[op_alt[i].matches]))
1546 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1547 {
1548 win = true;
1549 break;
1550 }
1551 }
1552
1553 if (!win)
1554 return false;
1555 }
1556
1557 return true;
1558}
1559
1560/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1561
1562static unsigned HOST_WIDE_INT
1563ix86_asan_shadow_offset (void)
1564{
1565 return SUBTARGET_SHADOW_OFFSET;
1566}
1567
1568/* Argument support functions. */
1569
1570/* Return true when register may be used to pass function parameters. */
1571bool
1572ix86_function_arg_regno_p (int regno)
1573{
1574 int i;
1575 enum calling_abi call_abi;
1576 const int *parm_regs;
1577
1578 if (TARGET_SSE && SSE_REGNO_P (regno)
1579 && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1580 return true;
1581
1582 if (!TARGET_64BIT)
1583 return (regno < REGPARM_MAX
1584 || (TARGET_MMX && MMX_REGNO_P (regno)
1585 && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1586
1587 /* TODO: The function should depend on current function ABI but
1588 builtins.cc would need updating then. Therefore we use the
1589 default ABI. */
1590 call_abi = ix86_cfun_abi ();
1591
1592 /* RAX is used as hidden argument to va_arg functions. */
1593 if (call_abi == SYSV_ABI && regno == AX_REG)
1594 return true;
1595
1596 if (cfun
1597 && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
1598 parm_regs = x86_64_preserve_none_int_parameter_registers;
1599 else if (call_abi == MS_ABI)
1600 parm_regs = x86_64_ms_abi_int_parameter_registers;
1601 else
1602 parm_regs = x86_64_int_parameter_registers;
1603
1604 for (i = 0; i < (call_abi == MS_ABI
1605 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1606 if (regno == parm_regs[i])
1607 return true;
1608 return false;
1609}
1610
1611/* Return if we do not know how to pass ARG solely in registers. */
1612
1613static bool
1614ix86_must_pass_in_stack (const function_arg_info &arg)
1615{
1616 if (must_pass_in_stack_var_size_or_pad (arg))
1617 return true;
1618
1619 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1620 The layout_type routine is crafty and tries to trick us into passing
1621 currently unsupported vector types on the stack by using TImode. */
1622 return (!TARGET_64BIT && arg.mode == TImode
1623 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1624}
1625
1626/* It returns the size, in bytes, of the area reserved for arguments passed
1627 in registers for the function represented by fndecl dependent to the used
1628 abi format. */
1629int
1630ix86_reg_parm_stack_space (const_tree fndecl)
1631{
1632 enum calling_abi call_abi = SYSV_ABI;
1633 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1634 call_abi = ix86_function_abi (fndecl);
1635 else
1636 call_abi = ix86_function_type_abi (fndecl);
1637 if (TARGET_64BIT && call_abi == MS_ABI)
1638 return 32;
1639 return 0;
1640}
1641
1642/* We add this as a workaround in order to use libc_has_function
1643 hook in i386.md. */
1644bool
1645ix86_libc_has_function (enum function_class fn_class)
1646{
1647 return targetm.libc_has_function (fn_class, NULL_TREE);
1648}
1649
1650/* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1651 specifying the call abi used. */
1652enum calling_abi
1653ix86_function_type_abi (const_tree fntype)
1654{
1655 enum calling_abi abi = ix86_abi;
1656
1657 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1658 return abi;
1659
1660 if (abi == SYSV_ABI
1661 && lookup_attribute (attr_name: "ms_abi", TYPE_ATTRIBUTES (fntype)))
1662 {
1663 static int warned;
1664 if (TARGET_X32 && !warned)
1665 {
1666 error ("X32 does not support %<ms_abi%> attribute");
1667 warned = 1;
1668 }
1669
1670 abi = MS_ABI;
1671 }
1672 else if (abi == MS_ABI
1673 && lookup_attribute (attr_name: "sysv_abi", TYPE_ATTRIBUTES (fntype)))
1674 abi = SYSV_ABI;
1675
1676 return abi;
1677}
1678
1679enum calling_abi
1680ix86_function_abi (const_tree fndecl)
1681{
1682 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1683}
1684
1685/* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1686 specifying the call abi used. */
1687enum calling_abi
1688ix86_cfun_abi (void)
1689{
1690 return cfun ? cfun->machine->call_abi : ix86_abi;
1691}
1692
1693bool
1694ix86_function_ms_hook_prologue (const_tree fn)
1695{
1696 if (fn && lookup_attribute (attr_name: "ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1697 {
1698 if (decl_function_context (fn) != NULL_TREE)
1699 error_at (DECL_SOURCE_LOCATION (fn),
1700 "%<ms_hook_prologue%> attribute is not compatible "
1701 "with nested function");
1702 else
1703 return true;
1704 }
1705 return false;
1706}
1707
1708bool
1709ix86_function_naked (const_tree fn)
1710{
1711 if (fn && lookup_attribute (attr_name: "naked", DECL_ATTRIBUTES (fn)))
1712 return true;
1713
1714 return false;
1715}
1716
1717/* Write the extra assembler code needed to declare a function properly. */
1718
1719void
1720ix86_asm_output_function_label (FILE *out_file, const char *fname,
1721 tree decl)
1722{
1723 bool is_ms_hook = ix86_function_ms_hook_prologue (fn: decl);
1724
1725 if (cfun)
1726 cfun->machine->function_label_emitted = true;
1727
1728 if (is_ms_hook)
1729 {
1730 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1731 unsigned int filler_cc = 0xcccccccc;
1732
1733 for (i = 0; i < filler_count; i += 4)
1734 fprintf (stream: out_file, ASM_LONG " %#x\n", filler_cc);
1735 }
1736
1737#ifdef SUBTARGET_ASM_UNWIND_INIT
1738 SUBTARGET_ASM_UNWIND_INIT (out_file);
1739#endif
1740
1741 assemble_function_label_raw (out_file, fname);
1742
1743 /* Output magic byte marker, if hot-patch attribute is set. */
1744 if (is_ms_hook)
1745 {
1746 if (TARGET_64BIT)
1747 {
1748 /* leaq [%rsp + 0], %rsp */
1749 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1750 stream: out_file);
1751 }
1752 else
1753 {
1754 /* movl.s %edi, %edi
1755 push %ebp
1756 movl.s %esp, %ebp */
1757 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", stream: out_file);
1758 }
1759 }
1760}
1761
1762/* Output a user-defined label. In AT&T syntax, registers are prefixed
1763 with %, so labels require no punctuation. In Intel syntax, registers
1764 are unprefixed, so labels may clash with registers or other operators,
1765 and require quoting. */
1766void
1767ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
1768{
1769 if (ASSEMBLER_DIALECT == ASM_ATT)
1770 fprintf (stream: file, format: "%s%s", prefix, label);
1771 else
1772 fprintf (stream: file, format: "\"%s%s\"", prefix, label);
1773}
1774
1775/* Implementation of call abi switching target hook. Specific to FNDECL
1776 the specific call register sets are set. See also
1777 ix86_conditional_register_usage for more details. */
1778void
1779ix86_call_abi_override (const_tree fndecl)
1780{
1781 cfun->machine->call_abi = ix86_function_abi (fndecl);
1782}
1783
1784/* Return 1 if pseudo register should be created and used to hold
1785 GOT address for PIC code. */
1786bool
1787ix86_use_pseudo_pic_reg (void)
1788{
1789 if ((TARGET_64BIT
1790 && (ix86_cmodel == CM_SMALL_PIC
1791 || TARGET_PECOFF))
1792 || !flag_pic)
1793 return false;
1794 return true;
1795}
1796
1797/* Initialize large model PIC register. */
1798
1799static void
1800ix86_init_large_pic_reg (unsigned int tmp_regno)
1801{
1802 rtx_code_label *label;
1803 rtx tmp_reg;
1804
1805 gcc_assert (Pmode == DImode);
1806 label = gen_label_rtx ();
1807 emit_label (label);
1808 LABEL_PRESERVE_P (label) = 1;
1809 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1810 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1811 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1812 label));
1813 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1814 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1815 const char *name = LABEL_NAME (label);
1816 PUT_CODE (label, NOTE);
1817 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1818 NOTE_DELETED_LABEL_NAME (label) = name;
1819}
1820
1821/* Create and initialize PIC register if required. */
1822static void
1823ix86_init_pic_reg (void)
1824{
1825 edge entry_edge;
1826 rtx_insn *seq;
1827
1828 if (!ix86_use_pseudo_pic_reg ())
1829 return;
1830
1831 start_sequence ();
1832
1833 if (TARGET_64BIT)
1834 {
1835 if (ix86_cmodel == CM_LARGE_PIC)
1836 ix86_init_large_pic_reg (R11_REG);
1837 else
1838 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1839 }
1840 else
1841 {
1842 /* If there is future mcount call in the function it is more profitable
1843 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1844 rtx reg = crtl->profile
1845 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1846 : pic_offset_table_rtx;
1847 rtx_insn *insn = emit_insn (gen_set_got (reg));
1848 RTX_FRAME_RELATED_P (insn) = 1;
1849 if (crtl->profile)
1850 emit_move_insn (pic_offset_table_rtx, reg);
1851 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1852 }
1853
1854 seq = end_sequence ();
1855
1856 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1857 insert_insn_on_edge (seq, entry_edge);
1858 commit_one_edge_insertion (e: entry_edge);
1859}
1860
1861/* Initialize a variable CUM of type CUMULATIVE_ARGS
1862 for a call to a function whose data type is FNTYPE.
1863 For a library call, FNTYPE is 0. */
1864
1865void
1866init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1867 tree fntype, /* tree ptr for function decl */
1868 rtx libname, /* SYMBOL_REF of library name or 0 */
1869 tree fndecl,
1870 int caller)
1871{
1872 struct cgraph_node *local_info_node = NULL;
1873 struct cgraph_node *target = NULL;
1874
1875 /* Set silent_p to false to raise an error for invalid calls when
1876 expanding function body. */
1877 cfun->machine->silent_p = false;
1878
1879 memset (s: cum, c: 0, n: sizeof (*cum));
1880
1881 tree preserve_none_type;
1882 if (fndecl)
1883 {
1884 target = cgraph_node::get (decl: fndecl);
1885 if (target)
1886 {
1887 target = target->function_symbol ();
1888 local_info_node = cgraph_node::local_info_node (decl: target->decl);
1889 cum->call_abi = ix86_function_abi (fndecl: target->decl);
1890 preserve_none_type = TREE_TYPE (target->decl);
1891 }
1892 else
1893 {
1894 cum->call_abi = ix86_function_abi (fndecl);
1895 preserve_none_type = TREE_TYPE (fndecl);
1896 }
1897 }
1898 else
1899 {
1900 cum->call_abi = ix86_function_type_abi (fntype);
1901 preserve_none_type = fntype;
1902 }
1903 cum->preserve_none_abi
1904 = (preserve_none_type
1905 && (lookup_attribute (attr_name: "preserve_none",
1906 TYPE_ATTRIBUTES (preserve_none_type))
1907 != nullptr));
1908
1909 cum->caller = caller;
1910
1911 /* Set up the number of registers to use for passing arguments. */
1912 cum->nregs = ix86_regparm;
1913 if (TARGET_64BIT)
1914 {
1915 cum->nregs = (cum->call_abi == SYSV_ABI
1916 ? X86_64_REGPARM_MAX
1917 : X86_64_MS_REGPARM_MAX);
1918 }
1919 if (TARGET_SSE)
1920 {
1921 cum->sse_nregs = SSE_REGPARM_MAX;
1922 if (TARGET_64BIT)
1923 {
1924 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1925 ? X86_64_SSE_REGPARM_MAX
1926 : X86_64_MS_SSE_REGPARM_MAX);
1927 }
1928 }
1929 if (TARGET_MMX)
1930 cum->mmx_nregs = MMX_REGPARM_MAX;
1931 cum->warn_avx512f = true;
1932 cum->warn_avx = true;
1933 cum->warn_sse = true;
1934 cum->warn_mmx = true;
1935
1936 /* Because type might mismatch in between caller and callee, we need to
1937 use actual type of function for local calls.
1938 FIXME: cgraph_analyze can be told to actually record if function uses
1939 va_start so for local functions maybe_vaarg can be made aggressive
1940 helping K&R code.
1941 FIXME: once typesytem is fixed, we won't need this code anymore. */
1942 if (local_info_node && local_info_node->local
1943 && local_info_node->can_change_signature)
1944 fntype = TREE_TYPE (target->decl);
1945 cum->stdarg = stdarg_p (fntype);
1946 cum->maybe_vaarg = (fntype
1947 ? (!prototype_p (fntype) || stdarg_p (fntype))
1948 : !libname);
1949
1950 cum->decl = fndecl;
1951
1952 cum->warn_empty = !warn_abi || cum->stdarg;
1953 if (!cum->warn_empty && fntype)
1954 {
1955 function_args_iterator iter;
1956 tree argtype;
1957 bool seen_empty_type = false;
1958 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1959 {
1960 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1961 break;
1962 if (TYPE_EMPTY_P (argtype))
1963 seen_empty_type = true;
1964 else if (seen_empty_type)
1965 {
1966 cum->warn_empty = true;
1967 break;
1968 }
1969 }
1970 }
1971
1972 if (!TARGET_64BIT)
1973 {
1974 /* If there are variable arguments, then we won't pass anything
1975 in registers in 32-bit mode. */
1976 if (stdarg_p (fntype))
1977 {
1978 cum->nregs = 0;
1979 /* Since in 32-bit, variable arguments are always passed on
1980 stack, there is scratch register available for indirect
1981 sibcall. */
1982 cfun->machine->arg_reg_available = true;
1983 cum->sse_nregs = 0;
1984 cum->mmx_nregs = 0;
1985 cum->warn_avx512f = false;
1986 cum->warn_avx = false;
1987 cum->warn_sse = false;
1988 cum->warn_mmx = false;
1989 return;
1990 }
1991
1992 /* Use ecx and edx registers if function has fastcall attribute,
1993 else look for regparm information. */
1994 if (fntype)
1995 {
1996 unsigned int ccvt = ix86_get_callcvt (type: fntype);
1997 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1998 {
1999 cum->nregs = 1;
2000 cum->fastcall = 1; /* Same first register as in fastcall. */
2001 }
2002 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
2003 {
2004 cum->nregs = 2;
2005 cum->fastcall = 1;
2006 }
2007 else
2008 cum->nregs = ix86_function_regparm (type: fntype, decl: fndecl);
2009 }
2010
2011 /* Set up the number of SSE registers used for passing SFmode
2012 and DFmode arguments. Warn for mismatching ABI. */
2013 cum->float_in_sse = ix86_function_sseregparm (type: fntype, decl: fndecl, warn: true);
2014 }
2015
2016 cfun->machine->arg_reg_available = (cum->nregs > 0);
2017}
2018
2019/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2020 But in the case of vector types, it is some vector mode.
2021
2022 When we have only some of our vector isa extensions enabled, then there
2023 are some modes for which vector_mode_supported_p is false. For these
2024 modes, the generic vector support in gcc will choose some non-vector mode
2025 in order to implement the type. By computing the natural mode, we'll
2026 select the proper ABI location for the operand and not depend on whatever
2027 the middle-end decides to do with these vector types.
2028
2029 The midde-end can't deal with the vector types > 16 bytes. In this
2030 case, we return the original mode and warn ABI change if CUM isn't
2031 NULL.
2032
2033 If INT_RETURN is true, warn ABI change if the vector mode isn't
2034 available for function return value. */
2035
2036static machine_mode
2037type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
2038 bool in_return)
2039{
2040 machine_mode mode = TYPE_MODE (type);
2041
2042 if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
2043 {
2044 HOST_WIDE_INT size = int_size_in_bytes (type);
2045 if ((size == 8 || size == 16 || size == 32 || size == 64)
2046 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2047 && TYPE_VECTOR_SUBPARTS (node: type) > 1)
2048 {
2049 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2050
2051 /* There are no XFmode vector modes ... */
2052 if (innermode == XFmode)
2053 return mode;
2054
2055 /* ... and no decimal float vector modes. */
2056 if (DECIMAL_FLOAT_MODE_P (innermode))
2057 return mode;
2058
2059 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
2060 mode = MIN_MODE_VECTOR_FLOAT;
2061 else
2062 mode = MIN_MODE_VECTOR_INT;
2063
2064 /* Get the mode which has this inner mode and number of units. */
2065 FOR_EACH_MODE_FROM (mode, mode)
2066 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (node: type)
2067 && GET_MODE_INNER (mode) == innermode)
2068 {
2069 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
2070 {
2071 static bool warnedavx512f;
2072 static bool warnedavx512f_ret;
2073
2074 if (cum && cum->warn_avx512f && !warnedavx512f)
2075 {
2076 if (warning (OPT_Wpsabi, "AVX512F vector argument "
2077 "without AVX512F enabled changes the ABI"))
2078 warnedavx512f = true;
2079 }
2080 else if (in_return && !warnedavx512f_ret)
2081 {
2082 if (warning (OPT_Wpsabi, "AVX512F vector return "
2083 "without AVX512F enabled changes the ABI"))
2084 warnedavx512f_ret = true;
2085 }
2086
2087 return TYPE_MODE (type);
2088 }
2089 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
2090 {
2091 static bool warnedavx;
2092 static bool warnedavx_ret;
2093
2094 if (cum && cum->warn_avx && !warnedavx)
2095 {
2096 if (warning (OPT_Wpsabi, "AVX vector argument "
2097 "without AVX enabled changes the ABI"))
2098 warnedavx = true;
2099 }
2100 else if (in_return && !warnedavx_ret)
2101 {
2102 if (warning (OPT_Wpsabi, "AVX vector return "
2103 "without AVX enabled changes the ABI"))
2104 warnedavx_ret = true;
2105 }
2106
2107 return TYPE_MODE (type);
2108 }
2109 else if (((size == 8 && TARGET_64BIT) || size == 16)
2110 && !TARGET_SSE
2111 && !TARGET_IAMCU)
2112 {
2113 static bool warnedsse;
2114 static bool warnedsse_ret;
2115
2116 if (cum && cum->warn_sse && !warnedsse)
2117 {
2118 if (warning (OPT_Wpsabi, "SSE vector argument "
2119 "without SSE enabled changes the ABI"))
2120 warnedsse = true;
2121 }
2122 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
2123 {
2124 if (warning (OPT_Wpsabi, "SSE vector return "
2125 "without SSE enabled changes the ABI"))
2126 warnedsse_ret = true;
2127 }
2128 }
2129 else if ((size == 8 && !TARGET_64BIT)
2130 && (!cfun
2131 || cfun->machine->func_type == TYPE_NORMAL)
2132 && !TARGET_MMX
2133 && !TARGET_IAMCU)
2134 {
2135 static bool warnedmmx;
2136 static bool warnedmmx_ret;
2137
2138 if (cum && cum->warn_mmx && !warnedmmx)
2139 {
2140 if (warning (OPT_Wpsabi, "MMX vector argument "
2141 "without MMX enabled changes the ABI"))
2142 warnedmmx = true;
2143 }
2144 else if (in_return && !warnedmmx_ret)
2145 {
2146 if (warning (OPT_Wpsabi, "MMX vector return "
2147 "without MMX enabled changes the ABI"))
2148 warnedmmx_ret = true;
2149 }
2150 }
2151 return mode;
2152 }
2153
2154 gcc_unreachable ();
2155 }
2156 }
2157
2158 return mode;
2159}
2160
2161/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2162 this may not agree with the mode that the type system has chosen for the
2163 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2164 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2165
2166static rtx
2167gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
2168 unsigned int regno)
2169{
2170 rtx tmp;
2171
2172 if (orig_mode != BLKmode)
2173 tmp = gen_rtx_REG (orig_mode, regno);
2174 else
2175 {
2176 tmp = gen_rtx_REG (mode, regno);
2177 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2178 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2179 }
2180
2181 return tmp;
2182}
2183
2184/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2185 of this code is to classify each 8bytes of incoming argument by the register
2186 class and assign registers accordingly. */
2187
2188/* Return the union class of CLASS1 and CLASS2.
2189 See the x86-64 PS ABI for details. */
2190
2191static enum x86_64_reg_class
2192merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2193{
2194 /* Rule #1: If both classes are equal, this is the resulting class. */
2195 if (class1 == class2)
2196 return class1;
2197
2198 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2199 the other class. */
2200 if (class1 == X86_64_NO_CLASS)
2201 return class2;
2202 if (class2 == X86_64_NO_CLASS)
2203 return class1;
2204
2205 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2206 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2207 return X86_64_MEMORY_CLASS;
2208
2209 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2210 if ((class1 == X86_64_INTEGERSI_CLASS
2211 && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2212 || (class2 == X86_64_INTEGERSI_CLASS
2213 && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2214 return X86_64_INTEGERSI_CLASS;
2215 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2216 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2217 return X86_64_INTEGER_CLASS;
2218
2219 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2220 MEMORY is used. */
2221 if (class1 == X86_64_X87_CLASS
2222 || class1 == X86_64_X87UP_CLASS
2223 || class1 == X86_64_COMPLEX_X87_CLASS
2224 || class2 == X86_64_X87_CLASS
2225 || class2 == X86_64_X87UP_CLASS
2226 || class2 == X86_64_COMPLEX_X87_CLASS)
2227 return X86_64_MEMORY_CLASS;
2228
2229 /* Rule #6: Otherwise class SSE is used. */
2230 return X86_64_SSE_CLASS;
2231}
2232
2233/* Classify the argument of type TYPE and mode MODE.
2234 CLASSES will be filled by the register class used to pass each word
2235 of the operand. The number of words is returned. In case the parameter
2236 should be passed in memory, 0 is returned. As a special case for zero
2237 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2238
2239 BIT_OFFSET is used internally for handling records and specifies offset
2240 of the offset in bits modulo 512 to avoid overflow cases.
2241
2242 See the x86-64 PS ABI for details.
2243*/
2244
2245static int
2246classify_argument (machine_mode mode, const_tree type,
2247 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2248 int &zero_width_bitfields)
2249{
2250 HOST_WIDE_INT bytes
2251 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2252 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2253
2254 /* Variable sized entities are always passed/returned in memory. */
2255 if (bytes < 0)
2256 return 0;
2257
2258 if (mode != VOIDmode)
2259 {
2260 /* The value of "named" doesn't matter. */
2261 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2262 if (targetm.calls.must_pass_in_stack (arg))
2263 return 0;
2264 }
2265
2266 if (type && (AGGREGATE_TYPE_P (type)
2267 || (TREE_CODE (type) == BITINT_TYPE && words > 1)))
2268 {
2269 int i;
2270 tree field;
2271 enum x86_64_reg_class subclasses[MAX_CLASSES];
2272
2273 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2274 if (bytes > 64)
2275 return 0;
2276
2277 for (i = 0; i < words; i++)
2278 classes[i] = X86_64_NO_CLASS;
2279
2280 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2281 signalize memory class, so handle it as special case. */
2282 if (!words)
2283 {
2284 classes[0] = X86_64_NO_CLASS;
2285 return 1;
2286 }
2287
2288 /* Classify each field of record and merge classes. */
2289 switch (TREE_CODE (type))
2290 {
2291 case RECORD_TYPE:
2292 /* And now merge the fields of structure. */
2293 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2294 {
2295 if (TREE_CODE (field) == FIELD_DECL)
2296 {
2297 int num;
2298
2299 if (TREE_TYPE (field) == error_mark_node)
2300 continue;
2301
2302 /* Bitfields are always classified as integer. Handle them
2303 early, since later code would consider them to be
2304 misaligned integers. */
2305 if (DECL_BIT_FIELD (field))
2306 {
2307 if (integer_zerop (DECL_SIZE (field)))
2308 {
2309 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2310 continue;
2311 if (zero_width_bitfields != 2)
2312 {
2313 zero_width_bitfields = 1;
2314 continue;
2315 }
2316 }
2317 for (i = (int_bit_position (field)
2318 + (bit_offset % 64)) / 8 / 8;
2319 i < ((int_bit_position (field) + (bit_offset % 64))
2320 + tree_to_shwi (DECL_SIZE (field))
2321 + 63) / 8 / 8; i++)
2322 classes[i]
2323 = merge_classes (class1: X86_64_INTEGER_CLASS, class2: classes[i]);
2324 }
2325 else
2326 {
2327 int pos;
2328
2329 type = TREE_TYPE (field);
2330
2331 /* Flexible array member is ignored. */
2332 if (TYPE_MODE (type) == BLKmode
2333 && TREE_CODE (type) == ARRAY_TYPE
2334 && TYPE_SIZE (type) == NULL_TREE
2335 && TYPE_DOMAIN (type) != NULL_TREE
2336 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2337 == NULL_TREE))
2338 {
2339 static bool warned;
2340
2341 if (!warned && warn_psabi)
2342 {
2343 warned = true;
2344 inform (input_location,
2345 "the ABI of passing struct with"
2346 " a flexible array member has"
2347 " changed in GCC 4.4");
2348 }
2349 continue;
2350 }
2351 num = classify_argument (TYPE_MODE (type), type,
2352 classes: subclasses,
2353 bit_offset: (int_bit_position (field)
2354 + bit_offset) % 512,
2355 zero_width_bitfields);
2356 if (!num)
2357 return 0;
2358 pos = (int_bit_position (field)
2359 + (bit_offset % 64)) / 8 / 8;
2360 for (i = 0; i < num && (i + pos) < words; i++)
2361 classes[i + pos]
2362 = merge_classes (class1: subclasses[i], class2: classes[i + pos]);
2363 }
2364 }
2365 }
2366 break;
2367
2368 case ARRAY_TYPE:
2369 /* Arrays are handled as small records. */
2370 {
2371 int num;
2372 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2373 TREE_TYPE (type), classes: subclasses, bit_offset,
2374 zero_width_bitfields);
2375 if (!num)
2376 return 0;
2377
2378 /* The partial classes are now full classes. */
2379 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2380 subclasses[0] = X86_64_SSE_CLASS;
2381 if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2382 subclasses[0] = X86_64_SSE_CLASS;
2383 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2384 && !((bit_offset % 64) == 0 && bytes == 4))
2385 subclasses[0] = X86_64_INTEGER_CLASS;
2386
2387 for (i = 0; i < words; i++)
2388 classes[i] = subclasses[i % num];
2389
2390 break;
2391 }
2392 case UNION_TYPE:
2393 case QUAL_UNION_TYPE:
2394 /* Unions are similar to RECORD_TYPE but offset is always 0.
2395 */
2396 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2397 {
2398 if (TREE_CODE (field) == FIELD_DECL)
2399 {
2400 int num;
2401
2402 if (TREE_TYPE (field) == error_mark_node)
2403 continue;
2404
2405 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2406 TREE_TYPE (field), classes: subclasses,
2407 bit_offset, zero_width_bitfields);
2408 if (!num)
2409 return 0;
2410 for (i = 0; i < num && i < words; i++)
2411 classes[i] = merge_classes (class1: subclasses[i], class2: classes[i]);
2412 }
2413 }
2414 break;
2415
2416 case BITINT_TYPE:
2417 /* _BitInt(N) for N > 64 is passed as structure containing
2418 (N + 63) / 64 64-bit elements. */
2419 if (words > 2)
2420 return 0;
2421 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2422 return 2;
2423
2424 default:
2425 gcc_unreachable ();
2426 }
2427
2428 if (words > 2)
2429 {
2430 /* When size > 16 bytes, if the first one isn't
2431 X86_64_SSE_CLASS or any other ones aren't
2432 X86_64_SSEUP_CLASS, everything should be passed in
2433 memory. */
2434 if (classes[0] != X86_64_SSE_CLASS)
2435 return 0;
2436
2437 for (i = 1; i < words; i++)
2438 if (classes[i] != X86_64_SSEUP_CLASS)
2439 return 0;
2440 }
2441
2442 /* Final merger cleanup. */
2443 for (i = 0; i < words; i++)
2444 {
2445 /* If one class is MEMORY, everything should be passed in
2446 memory. */
2447 if (classes[i] == X86_64_MEMORY_CLASS)
2448 return 0;
2449
2450 /* The X86_64_SSEUP_CLASS should be always preceded by
2451 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2452 if (classes[i] == X86_64_SSEUP_CLASS
2453 && classes[i - 1] != X86_64_SSE_CLASS
2454 && classes[i - 1] != X86_64_SSEUP_CLASS)
2455 {
2456 /* The first one should never be X86_64_SSEUP_CLASS. */
2457 gcc_assert (i != 0);
2458 classes[i] = X86_64_SSE_CLASS;
2459 }
2460
2461 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2462 everything should be passed in memory. */
2463 if (classes[i] == X86_64_X87UP_CLASS
2464 && (classes[i - 1] != X86_64_X87_CLASS))
2465 {
2466 static bool warned;
2467
2468 /* The first one should never be X86_64_X87UP_CLASS. */
2469 gcc_assert (i != 0);
2470 if (!warned && warn_psabi)
2471 {
2472 warned = true;
2473 inform (input_location,
2474 "the ABI of passing union with %<long double%>"
2475 " has changed in GCC 4.4");
2476 }
2477 return 0;
2478 }
2479 }
2480 return words;
2481 }
2482
2483 /* Compute alignment needed. We align all types to natural boundaries with
2484 exception of XFmode that is aligned to 64bits. */
2485 if (mode != VOIDmode && mode != BLKmode)
2486 {
2487 int mode_alignment = GET_MODE_BITSIZE (mode);
2488
2489 if (mode == XFmode)
2490 mode_alignment = 128;
2491 else if (mode == XCmode)
2492 mode_alignment = 256;
2493 if (COMPLEX_MODE_P (mode))
2494 mode_alignment /= 2;
2495 /* Misaligned fields are always returned in memory. */
2496 if (bit_offset % mode_alignment)
2497 return 0;
2498 }
2499
2500 /* for V1xx modes, just use the base mode */
2501 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2502 && GET_MODE_UNIT_SIZE (mode) == bytes)
2503 mode = GET_MODE_INNER (mode);
2504
2505 /* Classification of atomic types. */
2506 switch (mode)
2507 {
2508 case E_SDmode:
2509 case E_DDmode:
2510 classes[0] = X86_64_SSE_CLASS;
2511 return 1;
2512 case E_TDmode:
2513 classes[0] = X86_64_SSE_CLASS;
2514 classes[1] = X86_64_SSEUP_CLASS;
2515 return 2;
2516 case E_DImode:
2517 case E_SImode:
2518 case E_HImode:
2519 case E_QImode:
2520 case E_CSImode:
2521 case E_CHImode:
2522 case E_CQImode:
2523 {
2524 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2525
2526 /* Analyze last 128 bits only. */
2527 size = (size - 1) & 0x7f;
2528
2529 if (size < 32)
2530 {
2531 classes[0] = X86_64_INTEGERSI_CLASS;
2532 return 1;
2533 }
2534 else if (size < 64)
2535 {
2536 classes[0] = X86_64_INTEGER_CLASS;
2537 return 1;
2538 }
2539 else if (size < 64+32)
2540 {
2541 classes[0] = X86_64_INTEGER_CLASS;
2542 classes[1] = X86_64_INTEGERSI_CLASS;
2543 return 2;
2544 }
2545 else if (size < 64+64)
2546 {
2547 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2548 return 2;
2549 }
2550 else
2551 gcc_unreachable ();
2552 }
2553 case E_CDImode:
2554 case E_TImode:
2555 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2556 return 2;
2557 case E_COImode:
2558 case E_OImode:
2559 /* OImode shouldn't be used directly. */
2560 gcc_unreachable ();
2561 case E_CTImode:
2562 return 0;
2563 case E_HFmode:
2564 case E_BFmode:
2565 if (!(bit_offset % 64))
2566 classes[0] = X86_64_SSEHF_CLASS;
2567 else
2568 classes[0] = X86_64_SSE_CLASS;
2569 return 1;
2570 case E_SFmode:
2571 if (!(bit_offset % 64))
2572 classes[0] = X86_64_SSESF_CLASS;
2573 else
2574 classes[0] = X86_64_SSE_CLASS;
2575 return 1;
2576 case E_DFmode:
2577 classes[0] = X86_64_SSEDF_CLASS;
2578 return 1;
2579 case E_XFmode:
2580 classes[0] = X86_64_X87_CLASS;
2581 classes[1] = X86_64_X87UP_CLASS;
2582 return 2;
2583 case E_TFmode:
2584 classes[0] = X86_64_SSE_CLASS;
2585 classes[1] = X86_64_SSEUP_CLASS;
2586 return 2;
2587 case E_HCmode:
2588 case E_BCmode:
2589 classes[0] = X86_64_SSE_CLASS;
2590 if (!(bit_offset % 64))
2591 return 1;
2592 else
2593 {
2594 classes[1] = X86_64_SSEHF_CLASS;
2595 return 2;
2596 }
2597 case E_SCmode:
2598 classes[0] = X86_64_SSE_CLASS;
2599 if (!(bit_offset % 64))
2600 return 1;
2601 else
2602 {
2603 static bool warned;
2604
2605 if (!warned && warn_psabi)
2606 {
2607 warned = true;
2608 inform (input_location,
2609 "the ABI of passing structure with %<complex float%>"
2610 " member has changed in GCC 4.4");
2611 }
2612 classes[1] = X86_64_SSESF_CLASS;
2613 return 2;
2614 }
2615 case E_DCmode:
2616 classes[0] = X86_64_SSEDF_CLASS;
2617 classes[1] = X86_64_SSEDF_CLASS;
2618 return 2;
2619 case E_XCmode:
2620 classes[0] = X86_64_COMPLEX_X87_CLASS;
2621 return 1;
2622 case E_TCmode:
2623 /* This modes is larger than 16 bytes. */
2624 return 0;
2625 case E_V8SFmode:
2626 case E_V8SImode:
2627 case E_V32QImode:
2628 case E_V16HFmode:
2629 case E_V16BFmode:
2630 case E_V16HImode:
2631 case E_V4DFmode:
2632 case E_V4DImode:
2633 classes[0] = X86_64_SSE_CLASS;
2634 classes[1] = X86_64_SSEUP_CLASS;
2635 classes[2] = X86_64_SSEUP_CLASS;
2636 classes[3] = X86_64_SSEUP_CLASS;
2637 return 4;
2638 case E_V8DFmode:
2639 case E_V16SFmode:
2640 case E_V32HFmode:
2641 case E_V32BFmode:
2642 case E_V8DImode:
2643 case E_V16SImode:
2644 case E_V32HImode:
2645 case E_V64QImode:
2646 classes[0] = X86_64_SSE_CLASS;
2647 classes[1] = X86_64_SSEUP_CLASS;
2648 classes[2] = X86_64_SSEUP_CLASS;
2649 classes[3] = X86_64_SSEUP_CLASS;
2650 classes[4] = X86_64_SSEUP_CLASS;
2651 classes[5] = X86_64_SSEUP_CLASS;
2652 classes[6] = X86_64_SSEUP_CLASS;
2653 classes[7] = X86_64_SSEUP_CLASS;
2654 return 8;
2655 case E_V4SFmode:
2656 case E_V4SImode:
2657 case E_V16QImode:
2658 case E_V8HImode:
2659 case E_V8HFmode:
2660 case E_V8BFmode:
2661 case E_V2DFmode:
2662 case E_V2DImode:
2663 classes[0] = X86_64_SSE_CLASS;
2664 classes[1] = X86_64_SSEUP_CLASS;
2665 return 2;
2666 case E_V1TImode:
2667 case E_V1DImode:
2668 case E_V2SFmode:
2669 case E_V2SImode:
2670 case E_V4HImode:
2671 case E_V4HFmode:
2672 case E_V4BFmode:
2673 case E_V2HFmode:
2674 case E_V2BFmode:
2675 case E_V8QImode:
2676 classes[0] = X86_64_SSE_CLASS;
2677 return 1;
2678 case E_BLKmode:
2679 case E_VOIDmode:
2680 return 0;
2681 default:
2682 gcc_assert (VECTOR_MODE_P (mode));
2683
2684 if (bytes > 16)
2685 return 0;
2686
2687 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2688
2689 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2690 classes[0] = X86_64_INTEGERSI_CLASS;
2691 else
2692 classes[0] = X86_64_INTEGER_CLASS;
2693 classes[1] = X86_64_INTEGER_CLASS;
2694 return 1 + (bytes > 8);
2695 }
2696}
2697
2698/* Wrapper around classify_argument with the extra zero_width_bitfields
2699 argument, to diagnose GCC 12.1 ABI differences for C. */
2700
2701static int
2702classify_argument (machine_mode mode, const_tree type,
2703 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2704{
2705 int zero_width_bitfields = 0;
2706 static bool warned = false;
2707 int n = classify_argument (mode, type, classes, bit_offset,
2708 zero_width_bitfields);
2709 if (!zero_width_bitfields || warned || !warn_psabi)
2710 return n;
2711 enum x86_64_reg_class alt_classes[MAX_CLASSES];
2712 zero_width_bitfields = 2;
2713 if (classify_argument (mode, type, classes: alt_classes, bit_offset,
2714 zero_width_bitfields) != n)
2715 zero_width_bitfields = 3;
2716 else
2717 for (int i = 0; i < n; i++)
2718 if (classes[i] != alt_classes[i])
2719 {
2720 zero_width_bitfields = 3;
2721 break;
2722 }
2723 if (zero_width_bitfields == 3)
2724 {
2725 warned = true;
2726 const char *url
2727 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2728
2729 inform (input_location,
2730 "the ABI of passing C structures with zero-width bit-fields"
2731 " has changed in GCC %{12.1%}", url);
2732 }
2733 return n;
2734}
2735
2736/* Examine the argument and return set number of register required in each
2737 class. Return true iff parameter should be passed in memory. */
2738
2739static bool
2740examine_argument (machine_mode mode, const_tree type, int in_return,
2741 int *int_nregs, int *sse_nregs)
2742{
2743 enum x86_64_reg_class regclass[MAX_CLASSES];
2744 int n = classify_argument (mode, type, classes: regclass, bit_offset: 0);
2745
2746 *int_nregs = 0;
2747 *sse_nregs = 0;
2748
2749 if (!n)
2750 return true;
2751 for (n--; n >= 0; n--)
2752 switch (regclass[n])
2753 {
2754 case X86_64_INTEGER_CLASS:
2755 case X86_64_INTEGERSI_CLASS:
2756 (*int_nregs)++;
2757 break;
2758 case X86_64_SSE_CLASS:
2759 case X86_64_SSEHF_CLASS:
2760 case X86_64_SSESF_CLASS:
2761 case X86_64_SSEDF_CLASS:
2762 (*sse_nregs)++;
2763 break;
2764 case X86_64_NO_CLASS:
2765 case X86_64_SSEUP_CLASS:
2766 break;
2767 case X86_64_X87_CLASS:
2768 case X86_64_X87UP_CLASS:
2769 case X86_64_COMPLEX_X87_CLASS:
2770 if (!in_return)
2771 return true;
2772 break;
2773 case X86_64_MEMORY_CLASS:
2774 gcc_unreachable ();
2775 }
2776
2777 return false;
2778}
2779
2780/* Construct container for the argument used by GCC interface. See
2781 FUNCTION_ARG for the detailed description. */
2782
2783static rtx
2784construct_container (machine_mode mode, machine_mode orig_mode,
2785 const_tree type, int in_return, int nintregs, int nsseregs,
2786 const int *intreg, int sse_regno)
2787{
2788 /* The following variables hold the static issued_error state. */
2789 static bool issued_sse_arg_error;
2790 static bool issued_sse_ret_error;
2791 static bool issued_x87_ret_error;
2792
2793 machine_mode tmpmode;
2794 int bytes
2795 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2796 enum x86_64_reg_class regclass[MAX_CLASSES];
2797 int n;
2798 int i;
2799 int nexps = 0;
2800 int needed_sseregs, needed_intregs;
2801 rtx exp[MAX_CLASSES];
2802 rtx ret;
2803
2804 n = classify_argument (mode, type, classes: regclass, bit_offset: 0);
2805 if (!n)
2806 return NULL;
2807 if (examine_argument (mode, type, in_return, int_nregs: &needed_intregs,
2808 sse_nregs: &needed_sseregs))
2809 return NULL;
2810 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2811 return NULL;
2812
2813 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2814 some less clueful developer tries to use floating-point anyway. */
2815 if (needed_sseregs
2816 && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
2817 {
2818 /* Return early if we shouldn't raise an error for invalid
2819 calls. */
2820 if (cfun != NULL && cfun->machine->silent_p)
2821 return NULL;
2822 if (in_return)
2823 {
2824 if (!issued_sse_ret_error)
2825 {
2826 if (VALID_SSE2_TYPE_MODE (mode))
2827 error ("SSE register return with SSE2 disabled");
2828 else
2829 error ("SSE register return with SSE disabled");
2830 issued_sse_ret_error = true;
2831 }
2832 }
2833 else if (!issued_sse_arg_error)
2834 {
2835 if (VALID_SSE2_TYPE_MODE (mode))
2836 error ("SSE register argument with SSE2 disabled");
2837 else
2838 error ("SSE register argument with SSE disabled");
2839 issued_sse_arg_error = true;
2840 }
2841 return NULL;
2842 }
2843
2844 /* Likewise, error if the ABI requires us to return values in the
2845 x87 registers and the user specified -mno-80387. */
2846 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2847 for (i = 0; i < n; i++)
2848 if (regclass[i] == X86_64_X87_CLASS
2849 || regclass[i] == X86_64_X87UP_CLASS
2850 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2851 {
2852 /* Return early if we shouldn't raise an error for invalid
2853 calls. */
2854 if (cfun != NULL && cfun->machine->silent_p)
2855 return NULL;
2856 if (!issued_x87_ret_error)
2857 {
2858 error ("x87 register return with x87 disabled");
2859 issued_x87_ret_error = true;
2860 }
2861 return NULL;
2862 }
2863
2864 /* First construct simple cases. Avoid SCmode, since we want to use
2865 single register to pass this type. */
2866 if (n == 1 && mode != SCmode && mode != HCmode)
2867 switch (regclass[0])
2868 {
2869 case X86_64_INTEGER_CLASS:
2870 case X86_64_INTEGERSI_CLASS:
2871 return gen_rtx_REG (mode, intreg[0]);
2872 case X86_64_SSE_CLASS:
2873 case X86_64_SSEHF_CLASS:
2874 case X86_64_SSESF_CLASS:
2875 case X86_64_SSEDF_CLASS:
2876 if (mode != BLKmode)
2877 return gen_reg_or_parallel (mode, orig_mode,
2878 GET_SSE_REGNO (sse_regno));
2879 break;
2880 case X86_64_X87_CLASS:
2881 case X86_64_COMPLEX_X87_CLASS:
2882 return gen_rtx_REG (mode, FIRST_STACK_REG);
2883 case X86_64_NO_CLASS:
2884 /* Zero sized array, struct or class. */
2885 return NULL;
2886 default:
2887 gcc_unreachable ();
2888 }
2889 if (n == 2
2890 && regclass[0] == X86_64_SSE_CLASS
2891 && regclass[1] == X86_64_SSEUP_CLASS
2892 && mode != BLKmode)
2893 return gen_reg_or_parallel (mode, orig_mode,
2894 GET_SSE_REGNO (sse_regno));
2895 if (n == 4
2896 && regclass[0] == X86_64_SSE_CLASS
2897 && regclass[1] == X86_64_SSEUP_CLASS
2898 && regclass[2] == X86_64_SSEUP_CLASS
2899 && regclass[3] == X86_64_SSEUP_CLASS
2900 && mode != BLKmode)
2901 return gen_reg_or_parallel (mode, orig_mode,
2902 GET_SSE_REGNO (sse_regno));
2903 if (n == 8
2904 && regclass[0] == X86_64_SSE_CLASS
2905 && regclass[1] == X86_64_SSEUP_CLASS
2906 && regclass[2] == X86_64_SSEUP_CLASS
2907 && regclass[3] == X86_64_SSEUP_CLASS
2908 && regclass[4] == X86_64_SSEUP_CLASS
2909 && regclass[5] == X86_64_SSEUP_CLASS
2910 && regclass[6] == X86_64_SSEUP_CLASS
2911 && regclass[7] == X86_64_SSEUP_CLASS
2912 && mode != BLKmode)
2913 return gen_reg_or_parallel (mode, orig_mode,
2914 GET_SSE_REGNO (sse_regno));
2915 if (n == 2
2916 && regclass[0] == X86_64_X87_CLASS
2917 && regclass[1] == X86_64_X87UP_CLASS)
2918 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2919
2920 if (n == 2
2921 && regclass[0] == X86_64_INTEGER_CLASS
2922 && regclass[1] == X86_64_INTEGER_CLASS
2923 && (mode == CDImode || mode == TImode || mode == BLKmode)
2924 && intreg[0] + 1 == intreg[1])
2925 {
2926 if (mode == BLKmode)
2927 {
2928 /* Use TImode for BLKmode values in 2 integer registers. */
2929 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2930 gen_rtx_REG (TImode, intreg[0]),
2931 GEN_INT (0));
2932 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2933 XVECEXP (ret, 0, 0) = exp[0];
2934 return ret;
2935 }
2936 else
2937 return gen_rtx_REG (mode, intreg[0]);
2938 }
2939
2940 /* Otherwise figure out the entries of the PARALLEL. */
2941 for (i = 0; i < n; i++)
2942 {
2943 int pos;
2944
2945 switch (regclass[i])
2946 {
2947 case X86_64_NO_CLASS:
2948 break;
2949 case X86_64_INTEGER_CLASS:
2950 case X86_64_INTEGERSI_CLASS:
2951 /* Merge TImodes on aligned occasions here too. */
2952 if (i * 8 + 8 > bytes)
2953 {
2954 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2955 if (!int_mode_for_size (size: tmpbits, limit: 0).exists (mode: &tmpmode))
2956 /* We've requested 24 bytes we
2957 don't have mode for. Use DImode. */
2958 tmpmode = DImode;
2959 }
2960 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2961 tmpmode = SImode;
2962 else
2963 tmpmode = DImode;
2964 exp [nexps++]
2965 = gen_rtx_EXPR_LIST (VOIDmode,
2966 gen_rtx_REG (tmpmode, *intreg),
2967 GEN_INT (i*8));
2968 intreg++;
2969 break;
2970 case X86_64_SSEHF_CLASS:
2971 tmpmode = (mode == BFmode ? BFmode : HFmode);
2972 exp [nexps++]
2973 = gen_rtx_EXPR_LIST (VOIDmode,
2974 gen_rtx_REG (tmpmode,
2975 GET_SSE_REGNO (sse_regno)),
2976 GEN_INT (i*8));
2977 sse_regno++;
2978 break;
2979 case X86_64_SSESF_CLASS:
2980 exp [nexps++]
2981 = gen_rtx_EXPR_LIST (VOIDmode,
2982 gen_rtx_REG (SFmode,
2983 GET_SSE_REGNO (sse_regno)),
2984 GEN_INT (i*8));
2985 sse_regno++;
2986 break;
2987 case X86_64_SSEDF_CLASS:
2988 exp [nexps++]
2989 = gen_rtx_EXPR_LIST (VOIDmode,
2990 gen_rtx_REG (DFmode,
2991 GET_SSE_REGNO (sse_regno)),
2992 GEN_INT (i*8));
2993 sse_regno++;
2994 break;
2995 case X86_64_SSE_CLASS:
2996 pos = i;
2997 switch (n)
2998 {
2999 case 1:
3000 tmpmode = DImode;
3001 break;
3002 case 2:
3003 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
3004 {
3005 tmpmode = TImode;
3006 i++;
3007 }
3008 else
3009 tmpmode = DImode;
3010 break;
3011 case 4:
3012 gcc_assert (i == 0
3013 && regclass[1] == X86_64_SSEUP_CLASS
3014 && regclass[2] == X86_64_SSEUP_CLASS
3015 && regclass[3] == X86_64_SSEUP_CLASS);
3016 tmpmode = OImode;
3017 i += 3;
3018 break;
3019 case 8:
3020 gcc_assert (i == 0
3021 && regclass[1] == X86_64_SSEUP_CLASS
3022 && regclass[2] == X86_64_SSEUP_CLASS
3023 && regclass[3] == X86_64_SSEUP_CLASS
3024 && regclass[4] == X86_64_SSEUP_CLASS
3025 && regclass[5] == X86_64_SSEUP_CLASS
3026 && regclass[6] == X86_64_SSEUP_CLASS
3027 && regclass[7] == X86_64_SSEUP_CLASS);
3028 tmpmode = XImode;
3029 i += 7;
3030 break;
3031 default:
3032 gcc_unreachable ();
3033 }
3034 exp [nexps++]
3035 = gen_rtx_EXPR_LIST (VOIDmode,
3036 gen_rtx_REG (tmpmode,
3037 GET_SSE_REGNO (sse_regno)),
3038 GEN_INT (pos*8));
3039 sse_regno++;
3040 break;
3041 default:
3042 gcc_unreachable ();
3043 }
3044 }
3045
3046 /* Empty aligned struct, union or class. */
3047 if (nexps == 0)
3048 return NULL;
3049
3050 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3051 for (i = 0; i < nexps; i++)
3052 XVECEXP (ret, 0, i) = exp [i];
3053 return ret;
3054}
3055
3056/* Update the data in CUM to advance over an argument of mode MODE
3057 and data type TYPE. (TYPE is null for libcalls where that information
3058 may not be available.)
3059
3060 Return a number of integer regsiters advanced over. */
3061
3062static int
3063function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3064 const_tree type, HOST_WIDE_INT bytes,
3065 HOST_WIDE_INT words)
3066{
3067 int res = 0;
3068 bool error_p = false;
3069
3070 if (TARGET_IAMCU)
3071 {
3072 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3073 bytes in registers. */
3074 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3075 goto pass_in_reg;
3076 return res;
3077 }
3078
3079 switch (mode)
3080 {
3081 default:
3082 break;
3083
3084 case E_BLKmode:
3085 if (bytes < 0)
3086 break;
3087 /* FALLTHRU */
3088
3089 case E_DImode:
3090 case E_SImode:
3091 case E_HImode:
3092 case E_QImode:
3093pass_in_reg:
3094 cum->words += words;
3095 cum->nregs -= words;
3096 cum->regno += words;
3097 if (cum->nregs >= 0)
3098 res = words;
3099 if (cum->nregs <= 0)
3100 {
3101 cum->nregs = 0;
3102 cfun->machine->arg_reg_available = false;
3103 cum->regno = 0;
3104 }
3105 break;
3106
3107 case E_OImode:
3108 /* OImode shouldn't be used directly. */
3109 gcc_unreachable ();
3110
3111 case E_DFmode:
3112 if (cum->float_in_sse == -1)
3113 error_p = true;
3114 if (cum->float_in_sse < 2)
3115 break;
3116 /* FALLTHRU */
3117 case E_SFmode:
3118 if (cum->float_in_sse == -1)
3119 error_p = true;
3120 if (cum->float_in_sse < 1)
3121 break;
3122 /* FALLTHRU */
3123
3124 case E_V16HFmode:
3125 case E_V16BFmode:
3126 case E_V8SFmode:
3127 case E_V8SImode:
3128 case E_V64QImode:
3129 case E_V32HImode:
3130 case E_V16SImode:
3131 case E_V8DImode:
3132 case E_V32HFmode:
3133 case E_V32BFmode:
3134 case E_V16SFmode:
3135 case E_V8DFmode:
3136 case E_V32QImode:
3137 case E_V16HImode:
3138 case E_V4DFmode:
3139 case E_V4DImode:
3140 case E_TImode:
3141 case E_V16QImode:
3142 case E_V8HImode:
3143 case E_V4SImode:
3144 case E_V2DImode:
3145 case E_V8HFmode:
3146 case E_V8BFmode:
3147 case E_V4SFmode:
3148 case E_V2DFmode:
3149 if (!type || !AGGREGATE_TYPE_P (type))
3150 {
3151 cum->sse_words += words;
3152 cum->sse_nregs -= 1;
3153 cum->sse_regno += 1;
3154 if (cum->sse_nregs <= 0)
3155 {
3156 cum->sse_nregs = 0;
3157 cum->sse_regno = 0;
3158 }
3159 }
3160 break;
3161
3162 case E_V8QImode:
3163 case E_V4HImode:
3164 case E_V4HFmode:
3165 case E_V4BFmode:
3166 case E_V2SImode:
3167 case E_V2SFmode:
3168 case E_V1TImode:
3169 case E_V1DImode:
3170 if (!type || !AGGREGATE_TYPE_P (type))
3171 {
3172 cum->mmx_words += words;
3173 cum->mmx_nregs -= 1;
3174 cum->mmx_regno += 1;
3175 if (cum->mmx_nregs <= 0)
3176 {
3177 cum->mmx_nregs = 0;
3178 cum->mmx_regno = 0;
3179 }
3180 }
3181 break;
3182 }
3183 if (error_p)
3184 {
3185 cum->float_in_sse = 0;
3186 error ("calling %qD with SSE calling convention without "
3187 "SSE/SSE2 enabled", cum->decl);
3188 sorry ("this is a GCC bug that can be worked around by adding "
3189 "attribute used to function called");
3190 }
3191
3192 return res;
3193}
3194
3195static int
3196function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
3197 const_tree type, HOST_WIDE_INT words, bool named)
3198{
3199 int int_nregs, sse_nregs;
3200
3201 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3202 if (!named && (VALID_AVX512F_REG_MODE (mode)
3203 || VALID_AVX256_REG_MODE (mode)))
3204 return 0;
3205
3206 if (!examine_argument (mode, type, in_return: 0, int_nregs: &int_nregs, sse_nregs: &sse_nregs)
3207 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3208 {
3209 cum->nregs -= int_nregs;
3210 cum->sse_nregs -= sse_nregs;
3211 cum->regno += int_nregs;
3212 cum->sse_regno += sse_nregs;
3213 return int_nregs;
3214 }
3215 else
3216 {
3217 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3218 cum->words = ROUND_UP (cum->words, align);
3219 cum->words += words;
3220 return 0;
3221 }
3222}
3223
3224static int
3225function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3226 HOST_WIDE_INT words)
3227{
3228 /* Otherwise, this should be passed indirect. */
3229 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3230
3231 cum->words += words;
3232 if (cum->nregs > 0)
3233 {
3234 cum->nregs -= 1;
3235 cum->regno += 1;
3236 return 1;
3237 }
3238 return 0;
3239}
3240
3241/* Update the data in CUM to advance over argument ARG. */
3242
3243static void
3244ix86_function_arg_advance (cumulative_args_t cum_v,
3245 const function_arg_info &arg)
3246{
3247 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
3248 machine_mode mode = arg.mode;
3249 HOST_WIDE_INT bytes, words;
3250 int nregs;
3251
3252 /* The argument of interrupt handler is a special case and is
3253 handled in ix86_function_arg. */
3254 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3255 return;
3256
3257 bytes = arg.promoted_size_in_bytes ();
3258 words = CEIL (bytes, UNITS_PER_WORD);
3259
3260 if (arg.type)
3261 mode = type_natural_mode (type: arg.type, NULL, in_return: false);
3262
3263 if (TARGET_64BIT)
3264 {
3265 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3266
3267 if (call_abi == MS_ABI)
3268 nregs = function_arg_advance_ms_64 (cum, bytes, words);
3269 else
3270 nregs = function_arg_advance_64 (cum, mode, type: arg.type, words,
3271 named: arg.named);
3272 }
3273 else
3274 nregs = function_arg_advance_32 (cum, mode, type: arg.type, bytes, words);
3275
3276 if (!nregs)
3277 {
3278 /* Track if there are outgoing arguments on stack. */
3279 if (cum->caller)
3280 cfun->machine->outgoing_args_on_stack = true;
3281 }
3282}
3283
3284/* Define where to put the arguments to a function.
3285 Value is zero to push the argument on the stack,
3286 or a hard register in which to store the argument.
3287
3288 MODE is the argument's machine mode.
3289 TYPE is the data type of the argument (as a tree).
3290 This is null for libcalls where that information may
3291 not be available.
3292 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3293 the preceding args and about the function being called.
3294 NAMED is nonzero if this argument is a named parameter
3295 (otherwise it is an extra parameter matching an ellipsis). */
3296
3297static rtx
3298function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3299 machine_mode orig_mode, const_tree type,
3300 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3301{
3302 bool error_p = false;
3303
3304 /* Avoid the AL settings for the Unix64 ABI. */
3305 if (mode == VOIDmode)
3306 return constm1_rtx;
3307
3308 if (TARGET_IAMCU)
3309 {
3310 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3311 bytes in registers. */
3312 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3313 goto pass_in_reg;
3314 return NULL_RTX;
3315 }
3316
3317 switch (mode)
3318 {
3319 default:
3320 break;
3321
3322 case E_BLKmode:
3323 if (bytes < 0)
3324 break;
3325 /* FALLTHRU */
3326 case E_DImode:
3327 case E_SImode:
3328 case E_HImode:
3329 case E_QImode:
3330pass_in_reg:
3331 if (words <= cum->nregs)
3332 {
3333 int regno = cum->regno;
3334
3335 /* Fastcall allocates the first two DWORD (SImode) or
3336 smaller arguments to ECX and EDX if it isn't an
3337 aggregate type . */
3338 if (cum->fastcall)
3339 {
3340 if (mode == BLKmode
3341 || mode == DImode
3342 || (type && AGGREGATE_TYPE_P (type)))
3343 break;
3344
3345 /* ECX not EAX is the first allocated register. */
3346 if (regno == AX_REG)
3347 regno = CX_REG;
3348 }
3349 return gen_rtx_REG (mode, regno);
3350 }
3351 break;
3352
3353 case E_DFmode:
3354 if (cum->float_in_sse == -1)
3355 error_p = true;
3356 if (cum->float_in_sse < 2)
3357 break;
3358 /* FALLTHRU */
3359 case E_SFmode:
3360 if (cum->float_in_sse == -1)
3361 error_p = true;
3362 if (cum->float_in_sse < 1)
3363 break;
3364 /* FALLTHRU */
3365 case E_TImode:
3366 /* In 32bit, we pass TImode in xmm registers. */
3367 case E_V16QImode:
3368 case E_V8HImode:
3369 case E_V4SImode:
3370 case E_V2DImode:
3371 case E_V8HFmode:
3372 case E_V8BFmode:
3373 case E_V4SFmode:
3374 case E_V2DFmode:
3375 if (!type || !AGGREGATE_TYPE_P (type))
3376 {
3377 if (cum->sse_nregs)
3378 return gen_reg_or_parallel (mode, orig_mode,
3379 regno: cum->sse_regno + FIRST_SSE_REG);
3380 }
3381 break;
3382
3383 case E_OImode:
3384 case E_XImode:
3385 /* OImode and XImode shouldn't be used directly. */
3386 gcc_unreachable ();
3387
3388 case E_V64QImode:
3389 case E_V32HImode:
3390 case E_V16SImode:
3391 case E_V8DImode:
3392 case E_V32HFmode:
3393 case E_V32BFmode:
3394 case E_V16SFmode:
3395 case E_V8DFmode:
3396 case E_V16HFmode:
3397 case E_V16BFmode:
3398 case E_V8SFmode:
3399 case E_V8SImode:
3400 case E_V32QImode:
3401 case E_V16HImode:
3402 case E_V4DFmode:
3403 case E_V4DImode:
3404 if (!type || !AGGREGATE_TYPE_P (type))
3405 {
3406 if (cum->sse_nregs)
3407 return gen_reg_or_parallel (mode, orig_mode,
3408 regno: cum->sse_regno + FIRST_SSE_REG);
3409 }
3410 break;
3411
3412 case E_V8QImode:
3413 case E_V4HImode:
3414 case E_V4HFmode:
3415 case E_V4BFmode:
3416 case E_V2SImode:
3417 case E_V2SFmode:
3418 case E_V1TImode:
3419 case E_V1DImode:
3420 if (!type || !AGGREGATE_TYPE_P (type))
3421 {
3422 if (cum->mmx_nregs)
3423 return gen_reg_or_parallel (mode, orig_mode,
3424 regno: cum->mmx_regno + FIRST_MMX_REG);
3425 }
3426 break;
3427 }
3428 if (error_p)
3429 {
3430 cum->float_in_sse = 0;
3431 error ("calling %qD with SSE calling convention without "
3432 "SSE/SSE2 enabled", cum->decl);
3433 sorry ("this is a GCC bug that can be worked around by adding "
3434 "attribute used to function called");
3435 }
3436
3437 return NULL_RTX;
3438}
3439
3440static rtx
3441function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3442 machine_mode orig_mode, const_tree type, bool named)
3443{
3444 /* Handle a hidden AL argument containing number of registers
3445 for varargs x86-64 functions. */
3446 if (mode == VOIDmode)
3447 return GEN_INT (cum->maybe_vaarg
3448 ? (cum->sse_nregs < 0
3449 ? X86_64_SSE_REGPARM_MAX
3450 : cum->sse_regno)
3451 : -1);
3452
3453 switch (mode)
3454 {
3455 default:
3456 break;
3457
3458 case E_V16HFmode:
3459 case E_V16BFmode:
3460 case E_V8SFmode:
3461 case E_V8SImode:
3462 case E_V32QImode:
3463 case E_V16HImode:
3464 case E_V4DFmode:
3465 case E_V4DImode:
3466 case E_V32HFmode:
3467 case E_V32BFmode:
3468 case E_V16SFmode:
3469 case E_V16SImode:
3470 case E_V64QImode:
3471 case E_V32HImode:
3472 case E_V8DFmode:
3473 case E_V8DImode:
3474 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3475 if (!named)
3476 return NULL;
3477 break;
3478 }
3479
3480 const int *parm_regs;
3481 if (cum->preserve_none_abi)
3482 parm_regs = x86_64_preserve_none_int_parameter_registers;
3483 else
3484 parm_regs = x86_64_int_parameter_registers;
3485
3486 return construct_container (mode, orig_mode, type, in_return: 0, nintregs: cum->nregs,
3487 nsseregs: cum->sse_nregs,
3488 intreg: &parm_regs[cum->regno],
3489 sse_regno: cum->sse_regno);
3490}
3491
3492static rtx
3493function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3494 machine_mode orig_mode, bool named, const_tree type,
3495 HOST_WIDE_INT bytes)
3496{
3497 unsigned int regno;
3498
3499 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3500 We use value of -2 to specify that current function call is MSABI. */
3501 if (mode == VOIDmode)
3502 return GEN_INT (-2);
3503
3504 /* If we've run out of registers, it goes on the stack. */
3505 if (cum->nregs == 0)
3506 return NULL_RTX;
3507
3508 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3509
3510 /* Only floating point modes less than 64 bits are passed in anything but
3511 integer regs. Larger floating point types are excluded as the Windows
3512 ABI requires vreg args can be shadowed in GPRs (for red zone / varargs). */
3513 if (TARGET_SSE && (mode == HFmode || mode == SFmode || mode == DFmode))
3514 {
3515 if (named)
3516 {
3517 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3518 regno = cum->regno + FIRST_SSE_REG;
3519 }
3520 else
3521 {
3522 rtx t1, t2;
3523
3524 /* Unnamed floating parameters are passed in both the
3525 SSE and integer registers. */
3526 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3527 t2 = gen_rtx_REG (mode, regno);
3528 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3529 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3530 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3531 }
3532 }
3533 /* Handle aggregated types passed in register. */
3534 if (orig_mode == BLKmode)
3535 {
3536 if (bytes > 0 && bytes <= 8)
3537 mode = (bytes > 4 ? DImode : SImode);
3538 if (mode == BLKmode)
3539 mode = DImode;
3540 }
3541
3542 return gen_reg_or_parallel (mode, orig_mode, regno);
3543}
3544
3545/* Return where to put the arguments to a function.
3546 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3547
3548 ARG describes the argument while CUM gives information about the
3549 preceding args and about the function being called. */
3550
3551static rtx
3552ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3553{
3554 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
3555 machine_mode mode = arg.mode;
3556 HOST_WIDE_INT bytes, words;
3557 rtx reg;
3558
3559 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3560 {
3561 gcc_assert (arg.type != NULL_TREE);
3562 if (POINTER_TYPE_P (arg.type))
3563 {
3564 /* This is the pointer argument. */
3565 gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3566 /* It is at -WORD(AP) in the current frame in interrupt and
3567 exception handlers. */
3568 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3569 }
3570 else
3571 {
3572 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3573 && TREE_CODE (arg.type) == INTEGER_TYPE
3574 && TYPE_MODE (arg.type) == word_mode);
3575 /* The error code is the word-mode integer argument at
3576 -2 * WORD(AP) in the current frame of the exception
3577 handler. */
3578 reg = gen_rtx_MEM (word_mode,
3579 plus_constant (Pmode,
3580 arg_pointer_rtx,
3581 -2 * UNITS_PER_WORD));
3582 }
3583 return reg;
3584 }
3585
3586 bytes = arg.promoted_size_in_bytes ();
3587 words = CEIL (bytes, UNITS_PER_WORD);
3588
3589 /* To simplify the code below, represent vector types with a vector mode
3590 even if MMX/SSE are not active. */
3591 if (arg.type && VECTOR_TYPE_P (arg.type))
3592 mode = type_natural_mode (type: arg.type, cum, in_return: false);
3593
3594 if (TARGET_64BIT)
3595 {
3596 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3597
3598 if (call_abi == MS_ABI)
3599 reg = function_arg_ms_64 (cum, mode, orig_mode: arg.mode, named: arg.named,
3600 type: arg.type, bytes);
3601 else
3602 reg = function_arg_64 (cum, mode, orig_mode: arg.mode, type: arg.type, named: arg.named);
3603 }
3604 else
3605 reg = function_arg_32 (cum, mode, orig_mode: arg.mode, type: arg.type, bytes, words);
3606
3607 /* Track if there are outgoing arguments on stack. */
3608 if (reg == NULL_RTX && cum->caller)
3609 cfun->machine->outgoing_args_on_stack = true;
3610
3611 return reg;
3612}
3613
3614/* A C expression that indicates when an argument must be passed by
3615 reference. If nonzero for an argument, a copy of that argument is
3616 made in memory and a pointer to the argument is passed instead of
3617 the argument itself. The pointer is passed in whatever way is
3618 appropriate for passing a pointer to that type. */
3619
3620static bool
3621ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3622{
3623 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
3624
3625 if (TARGET_64BIT)
3626 {
3627 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3628
3629 /* See Windows x64 Software Convention. */
3630 if (call_abi == MS_ABI)
3631 {
3632 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3633
3634 if (tree type = arg.type)
3635 {
3636 /* Arrays are passed by reference. */
3637 if (TREE_CODE (type) == ARRAY_TYPE)
3638 return true;
3639
3640 if (RECORD_OR_UNION_TYPE_P (type))
3641 {
3642 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3643 are passed by reference. */
3644 msize = int_size_in_bytes (type);
3645 }
3646 }
3647
3648 /* __m128 is passed by reference. */
3649 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3650 }
3651 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3652 return true;
3653 }
3654
3655 return false;
3656}
3657
3658/* Return true when TYPE should be 128bit aligned for 32bit argument
3659 passing ABI. XXX: This function is obsolete and is only used for
3660 checking psABI compatibility with previous versions of GCC. */
3661
3662static bool
3663ix86_compat_aligned_value_p (const_tree type)
3664{
3665 machine_mode mode = TYPE_MODE (type);
3666 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3667 || mode == TDmode
3668 || mode == TFmode
3669 || mode == TCmode)
3670 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3671 return true;
3672 if (TYPE_ALIGN (type) < 128)
3673 return false;
3674
3675 if (AGGREGATE_TYPE_P (type))
3676 {
3677 /* Walk the aggregates recursively. */
3678 switch (TREE_CODE (type))
3679 {
3680 case RECORD_TYPE:
3681 case UNION_TYPE:
3682 case QUAL_UNION_TYPE:
3683 {
3684 tree field;
3685
3686 /* Walk all the structure fields. */
3687 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3688 {
3689 if (TREE_CODE (field) == FIELD_DECL
3690 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3691 return true;
3692 }
3693 break;
3694 }
3695
3696 case ARRAY_TYPE:
3697 /* Just for use if some languages passes arrays by value. */
3698 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3699 return true;
3700 break;
3701
3702 default:
3703 gcc_unreachable ();
3704 }
3705 }
3706 return false;
3707}
3708
3709/* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3710 XXX: This function is obsolete and is only used for checking psABI
3711 compatibility with previous versions of GCC. */
3712
3713static unsigned int
3714ix86_compat_function_arg_boundary (machine_mode mode,
3715 const_tree type, unsigned int align)
3716{
3717 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3718 natural boundaries. */
3719 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3720 {
3721 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3722 make an exception for SSE modes since these require 128bit
3723 alignment.
3724
3725 The handling here differs from field_alignment. ICC aligns MMX
3726 arguments to 4 byte boundaries, while structure fields are aligned
3727 to 8 byte boundaries. */
3728 if (!type)
3729 {
3730 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3731 align = PARM_BOUNDARY;
3732 }
3733 else
3734 {
3735 if (!ix86_compat_aligned_value_p (type))
3736 align = PARM_BOUNDARY;
3737 }
3738 }
3739 if (align > BIGGEST_ALIGNMENT)
3740 align = BIGGEST_ALIGNMENT;
3741 return align;
3742}
3743
3744/* Return true when TYPE should be 128bit aligned for 32bit argument
3745 passing ABI. */
3746
3747static bool
3748ix86_contains_aligned_value_p (const_tree type)
3749{
3750 machine_mode mode = TYPE_MODE (type);
3751
3752 if (mode == XFmode || mode == XCmode)
3753 return false;
3754
3755 if (TYPE_ALIGN (type) < 128)
3756 return false;
3757
3758 if (AGGREGATE_TYPE_P (type))
3759 {
3760 /* Walk the aggregates recursively. */
3761 switch (TREE_CODE (type))
3762 {
3763 case RECORD_TYPE:
3764 case UNION_TYPE:
3765 case QUAL_UNION_TYPE:
3766 {
3767 tree field;
3768
3769 /* Walk all the structure fields. */
3770 for (field = TYPE_FIELDS (type);
3771 field;
3772 field = DECL_CHAIN (field))
3773 {
3774 if (TREE_CODE (field) == FIELD_DECL
3775 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3776 return true;
3777 }
3778 break;
3779 }
3780
3781 case ARRAY_TYPE:
3782 /* Just for use if some languages passes arrays by value. */
3783 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3784 return true;
3785 break;
3786
3787 default:
3788 gcc_unreachable ();
3789 }
3790 }
3791 else
3792 return TYPE_ALIGN (type) >= 128;
3793
3794 return false;
3795}
3796
3797/* Gives the alignment boundary, in bits, of an argument with the
3798 specified mode and type. */
3799
3800static unsigned int
3801ix86_function_arg_boundary (machine_mode mode, const_tree type)
3802{
3803 unsigned int align;
3804 if (type)
3805 {
3806 /* Since the main variant type is used for call, we convert it to
3807 the main variant type. */
3808 type = TYPE_MAIN_VARIANT (type);
3809 align = TYPE_ALIGN (type);
3810 if (TYPE_EMPTY_P (type))
3811 return PARM_BOUNDARY;
3812 }
3813 else
3814 align = GET_MODE_ALIGNMENT (mode);
3815 if (align < PARM_BOUNDARY)
3816 align = PARM_BOUNDARY;
3817 else
3818 {
3819 static bool warned;
3820 unsigned int saved_align = align;
3821
3822 if (!TARGET_64BIT)
3823 {
3824 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3825 if (!type)
3826 {
3827 if (mode == XFmode || mode == XCmode)
3828 align = PARM_BOUNDARY;
3829 }
3830 else if (!ix86_contains_aligned_value_p (type))
3831 align = PARM_BOUNDARY;
3832
3833 if (align < 128)
3834 align = PARM_BOUNDARY;
3835 }
3836
3837 if (warn_psabi
3838 && !warned
3839 && align != ix86_compat_function_arg_boundary (mode, type,
3840 align: saved_align))
3841 {
3842 warned = true;
3843 inform (input_location,
3844 "the ABI for passing parameters with %d-byte"
3845 " alignment has changed in GCC 4.6",
3846 align / BITS_PER_UNIT);
3847 }
3848 }
3849
3850 return align;
3851}
3852
3853/* Return true if N is a possible register number of function value. */
3854
3855static bool
3856ix86_function_value_regno_p (const unsigned int regno)
3857{
3858 switch (regno)
3859 {
3860 case AX_REG:
3861 return true;
3862 case DX_REG:
3863 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3864 case DI_REG:
3865 case SI_REG:
3866 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3867
3868 /* Complex values are returned in %st(0)/%st(1) pair. */
3869 case ST0_REG:
3870 case ST1_REG:
3871 /* TODO: The function should depend on current function ABI but
3872 builtins.cc would need updating then. Therefore we use the
3873 default ABI. */
3874 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3875 return false;
3876 return TARGET_FLOAT_RETURNS_IN_80387;
3877
3878 /* Complex values are returned in %xmm0/%xmm1 pair. */
3879 case XMM0_REG:
3880 case XMM1_REG:
3881 return TARGET_SSE;
3882
3883 case MM0_REG:
3884 if (TARGET_MACHO || TARGET_64BIT)
3885 return false;
3886 return TARGET_MMX;
3887 }
3888
3889 return false;
3890}
3891
3892/* Check whether the register REGNO should be zeroed on X86.
3893 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3894 together, no need to zero it again.
3895 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3896
3897static bool
3898zero_call_used_regno_p (const unsigned int regno,
3899 bool all_sse_zeroed,
3900 bool need_zero_mmx)
3901{
3902 return GENERAL_REGNO_P (regno)
3903 || (!all_sse_zeroed && SSE_REGNO_P (regno))
3904 || MASK_REGNO_P (regno)
3905 || (need_zero_mmx && MMX_REGNO_P (regno));
3906}
3907
3908/* Return the machine_mode that is used to zero register REGNO. */
3909
3910static machine_mode
3911zero_call_used_regno_mode (const unsigned int regno)
3912{
3913 /* NB: We only need to zero the lower 32 bits for integer registers
3914 and the lower 128 bits for vector registers since destination are
3915 zero-extended to the full register width. */
3916 if (GENERAL_REGNO_P (regno))
3917 return SImode;
3918 else if (SSE_REGNO_P (regno))
3919 return V4SFmode;
3920 else if (MASK_REGNO_P (regno))
3921 return HImode;
3922 else if (MMX_REGNO_P (regno))
3923 return V2SImode;
3924 else
3925 gcc_unreachable ();
3926}
3927
3928/* Generate a rtx to zero all vector registers together if possible,
3929 otherwise, return NULL. */
3930
3931static rtx
3932zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3933{
3934 if (!TARGET_AVX)
3935 return NULL;
3936
3937 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3938 if ((LEGACY_SSE_REGNO_P (regno)
3939 || (TARGET_64BIT
3940 && (REX_SSE_REGNO_P (regno)
3941 || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3942 && !TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
3943 return NULL;
3944
3945 return gen_avx_vzeroall ();
3946}
3947
3948/* Generate insns to zero all st registers together.
3949 Return true when zeroing instructions are generated.
3950 Assume the number of st registers that are zeroed is num_of_st,
3951 we will emit the following sequence to zero them together:
3952 fldz; \
3953 fldz; \
3954 ...
3955 fldz; \
3956 fstp %%st(0); \
3957 fstp %%st(0); \
3958 ...
3959 fstp %%st(0);
3960 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3961 mark stack slots empty.
3962
3963 How to compute the num_of_st:
3964 There is no direct mapping from stack registers to hard register
3965 numbers. If one stack register needs to be cleared, we don't know
3966 where in the stack the value remains. So, if any stack register
3967 needs to be cleared, the whole stack should be cleared. However,
3968 x87 stack registers that hold the return value should be excluded.
3969 x87 returns in the top (two for complex values) register, so
3970 num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3971 return the value of num_of_st. */
3972
3973
3974static int
3975zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3976{
3977
3978 /* If the FPU is disabled, no need to zero all st registers. */
3979 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3980 return 0;
3981
3982 unsigned int num_of_st = 0;
3983 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3984 if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3985 && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
3986 {
3987 num_of_st++;
3988 break;
3989 }
3990
3991 if (num_of_st == 0)
3992 return 0;
3993
3994 bool return_with_x87 = false;
3995 return_with_x87 = (crtl->return_rtx
3996 && (STACK_REG_P (crtl->return_rtx)));
3997
3998 bool complex_return = false;
3999 complex_return = (crtl->return_rtx
4000 && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
4001
4002 if (return_with_x87)
4003 if (complex_return)
4004 num_of_st = 6;
4005 else
4006 num_of_st = 7;
4007 else
4008 num_of_st = 8;
4009
4010 rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
4011 for (unsigned int i = 0; i < num_of_st; i++)
4012 emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
4013
4014 for (unsigned int i = 0; i < num_of_st; i++)
4015 {
4016 rtx insn;
4017 insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
4018 add_reg_note (insn, REG_DEAD, st_reg);
4019 }
4020 return num_of_st;
4021}
4022
4023
4024/* When the routine exit in MMX mode, if any ST register needs
4025 to be zeroed, we should clear all MMX registers except the
4026 RET_MMX_REGNO that holds the return value. */
4027static bool
4028zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
4029 unsigned int ret_mmx_regno)
4030{
4031 bool need_zero_all_mm = false;
4032 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4033 if (STACK_REGNO_P (regno)
4034 && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
4035 {
4036 need_zero_all_mm = true;
4037 break;
4038 }
4039
4040 if (!need_zero_all_mm)
4041 return false;
4042
4043 machine_mode mode = V2SImode;
4044 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4045 if (regno != ret_mmx_regno)
4046 {
4047 rtx reg = gen_rtx_REG (mode, regno);
4048 emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
4049 }
4050 return true;
4051}
4052
4053/* TARGET_ZERO_CALL_USED_REGS. */
4054/* Generate a sequence of instructions that zero registers specified by
4055 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
4056 zeroed. */
4057static HARD_REG_SET
4058ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
4059{
4060 HARD_REG_SET zeroed_hardregs;
4061 bool all_sse_zeroed = false;
4062 int all_st_zeroed_num = 0;
4063 bool all_mm_zeroed = false;
4064
4065 CLEAR_HARD_REG_SET (set&: zeroed_hardregs);
4066
4067 /* first, let's see whether we can zero all vector registers together. */
4068 rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
4069 if (zero_all_vec_insn)
4070 {
4071 emit_insn (zero_all_vec_insn);
4072 all_sse_zeroed = true;
4073 }
4074
4075 /* mm/st registers are shared registers set, we should follow the following
4076 rules to clear them:
4077 MMX exit mode x87 exit mode
4078 -------------|----------------------|---------------
4079 uses x87 reg | clear all MMX | clear all x87
4080 uses MMX reg | clear individual MMX | clear all x87
4081 x87 + MMX | clear all MMX | clear all x87
4082
4083 first, we should decide which mode (MMX mode or x87 mode) the function
4084 exit with. */
4085
4086 bool exit_with_mmx_mode = (crtl->return_rtx
4087 && (MMX_REG_P (crtl->return_rtx)));
4088
4089 if (!exit_with_mmx_mode)
4090 /* x87 exit mode, we should zero all st registers together. */
4091 {
4092 all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
4093
4094 if (all_st_zeroed_num > 0)
4095 for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
4096 /* x87 stack registers that hold the return value should be excluded.
4097 x87 returns in the top (two for complex values) register. */
4098 if (all_st_zeroed_num == 8
4099 || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
4100 || (all_st_zeroed_num == 6
4101 && (regno == (REGNO (crtl->return_rtx) + 1)))))
4102 SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno);
4103 }
4104 else
4105 /* MMX exit mode, check whether we can zero all mm registers. */
4106 {
4107 unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
4108 all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
4109 ret_mmx_regno: exit_mmx_regno);
4110 if (all_mm_zeroed)
4111 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4112 if (regno != exit_mmx_regno)
4113 SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno);
4114 }
4115
4116 /* Now, generate instructions to zero all the other registers. */
4117
4118 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4119 {
4120 if (!TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
4121 continue;
4122 if (!zero_call_used_regno_p (regno, all_sse_zeroed,
4123 need_zero_mmx: exit_with_mmx_mode && !all_mm_zeroed))
4124 continue;
4125
4126 SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno);
4127
4128 machine_mode mode = zero_call_used_regno_mode (regno);
4129
4130 rtx reg = gen_rtx_REG (mode, regno);
4131 rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
4132
4133 switch (mode)
4134 {
4135 case E_SImode:
4136 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
4137 {
4138 rtx clob = gen_rtx_CLOBBER (VOIDmode,
4139 gen_rtx_REG (CCmode,
4140 FLAGS_REG));
4141 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
4142 tmp,
4143 clob));
4144 }
4145 /* FALLTHRU. */
4146
4147 case E_V4SFmode:
4148 case E_HImode:
4149 case E_V2SImode:
4150 emit_insn (tmp);
4151 break;
4152
4153 default:
4154 gcc_unreachable ();
4155 }
4156 }
4157 return zeroed_hardregs;
4158}
4159
4160/* Define how to find the value returned by a function.
4161 VALTYPE is the data type of the value (as a tree).
4162 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4163 otherwise, FUNC is 0. */
4164
4165static rtx
4166function_value_32 (machine_mode orig_mode, machine_mode mode,
4167 const_tree fntype, const_tree fn)
4168{
4169 unsigned int regno;
4170
4171 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4172 we normally prevent this case when mmx is not available. However
4173 some ABIs may require the result to be returned like DImode. */
4174 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4175 regno = FIRST_MMX_REG;
4176
4177 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4178 we prevent this case when sse is not available. However some ABIs
4179 may require the result to be returned like integer TImode. */
4180 else if (mode == TImode
4181 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4182 regno = FIRST_SSE_REG;
4183
4184 /* 32-byte vector modes in %ymm0. */
4185 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4186 regno = FIRST_SSE_REG;
4187
4188 /* 64-byte vector modes in %zmm0. */
4189 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4190 regno = FIRST_SSE_REG;
4191
4192 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4193 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4194 regno = FIRST_FLOAT_REG;
4195 else
4196 /* Most things go in %eax. */
4197 regno = AX_REG;
4198
4199 /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4200 if (mode == HFmode || mode == BFmode)
4201 {
4202 if (!TARGET_SSE2)
4203 {
4204 error ("SSE register return with SSE2 disabled");
4205 regno = AX_REG;
4206 }
4207 else
4208 regno = FIRST_SSE_REG;
4209 }
4210
4211 if (mode == HCmode)
4212 {
4213 if (!TARGET_SSE2)
4214 error ("SSE register return with SSE2 disabled");
4215
4216 rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4217 XVECEXP (ret, 0, 0)
4218 = gen_rtx_EXPR_LIST (VOIDmode,
4219 gen_rtx_REG (SImode,
4220 TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
4221 GEN_INT (0));
4222 return ret;
4223 }
4224
4225 /* Override FP return register with %xmm0 for local functions when
4226 SSE math is enabled or for functions with sseregparm attribute. */
4227 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4228 {
4229 int sse_level = ix86_function_sseregparm (type: fntype, decl: fn, warn: false);
4230 if (sse_level == -1)
4231 {
4232 error ("calling %qD with SSE calling convention without "
4233 "SSE/SSE2 enabled", fn);
4234 sorry ("this is a GCC bug that can be worked around by adding "
4235 "attribute used to function called");
4236 }
4237 else if ((sse_level >= 1 && mode == SFmode)
4238 || (sse_level == 2 && mode == DFmode))
4239 regno = FIRST_SSE_REG;
4240 }
4241
4242 /* OImode shouldn't be used directly. */
4243 gcc_assert (mode != OImode);
4244
4245 return gen_rtx_REG (orig_mode, regno);
4246}
4247
4248static rtx
4249function_value_64 (machine_mode orig_mode, machine_mode mode,
4250 const_tree valtype)
4251{
4252 rtx ret;
4253
4254 /* Handle libcalls, which don't provide a type node. */
4255 if (valtype == NULL)
4256 {
4257 unsigned int regno;
4258
4259 switch (mode)
4260 {
4261 case E_BFmode:
4262 case E_HFmode:
4263 case E_HCmode:
4264 case E_SFmode:
4265 case E_SCmode:
4266 case E_DFmode:
4267 case E_DCmode:
4268 case E_TFmode:
4269 case E_SDmode:
4270 case E_DDmode:
4271 case E_TDmode:
4272 regno = FIRST_SSE_REG;
4273 break;
4274 case E_XFmode:
4275 case E_XCmode:
4276 regno = FIRST_FLOAT_REG;
4277 break;
4278 case E_TCmode:
4279 return NULL;
4280 default:
4281 regno = AX_REG;
4282 }
4283
4284 return gen_rtx_REG (mode, regno);
4285 }
4286 else if (POINTER_TYPE_P (valtype))
4287 {
4288 /* Pointers are always returned in word_mode. */
4289 mode = word_mode;
4290 }
4291
4292 ret = construct_container (mode, orig_mode, type: valtype, in_return: 1,
4293 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4294 intreg: x86_64_int_return_registers, sse_regno: 0);
4295
4296 /* For zero sized structures, construct_container returns NULL, but we
4297 need to keep rest of compiler happy by returning meaningful value. */
4298 if (!ret)
4299 ret = gen_rtx_REG (orig_mode, AX_REG);
4300
4301 return ret;
4302}
4303
4304static rtx
4305function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4306 const_tree fntype, const_tree fn, const_tree valtype)
4307{
4308 unsigned int regno;
4309
4310 /* Floating point return values in %st(0)
4311 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4312 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4313 && (GET_MODE_SIZE (mode) > 8
4314 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4315 {
4316 regno = FIRST_FLOAT_REG;
4317 return gen_rtx_REG (orig_mode, regno);
4318 }
4319 else
4320 return function_value_32(orig_mode, mode, fntype,fn);
4321}
4322
4323static rtx
4324function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4325 const_tree valtype)
4326{
4327 unsigned int regno = AX_REG;
4328
4329 if (TARGET_SSE)
4330 {
4331 switch (GET_MODE_SIZE (mode))
4332 {
4333 case 16:
4334 if (valtype != NULL_TREE
4335 && !VECTOR_INTEGER_TYPE_P (valtype)
4336 && !INTEGRAL_TYPE_P (valtype)
4337 && !VECTOR_FLOAT_TYPE_P (valtype))
4338 break;
4339 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4340 && !COMPLEX_MODE_P (mode))
4341 regno = FIRST_SSE_REG;
4342 break;
4343 case 8:
4344 case 4:
4345 case 2:
4346 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4347 break;
4348 if (mode == HFmode || mode == SFmode || mode == DFmode)
4349 regno = FIRST_SSE_REG;
4350 break;
4351 default:
4352 break;
4353 }
4354 }
4355 return gen_rtx_REG (orig_mode, regno);
4356}
4357
4358static rtx
4359ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4360 machine_mode orig_mode, machine_mode mode)
4361{
4362 const_tree fn, fntype;
4363
4364 fn = NULL_TREE;
4365 if (fntype_or_decl && DECL_P (fntype_or_decl))
4366 fn = fntype_or_decl;
4367 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4368
4369 if (ix86_function_type_abi (fntype) == MS_ABI)
4370 {
4371 if (TARGET_64BIT)
4372 return function_value_ms_64 (orig_mode, mode, valtype);
4373 else
4374 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4375 }
4376 else if (TARGET_64BIT)
4377 return function_value_64 (orig_mode, mode, valtype);
4378 else
4379 return function_value_32 (orig_mode, mode, fntype, fn);
4380}
4381
4382static rtx
4383ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4384{
4385 machine_mode mode, orig_mode;
4386
4387 orig_mode = TYPE_MODE (valtype);
4388 mode = type_natural_mode (type: valtype, NULL, in_return: true);
4389 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4390}
4391
4392/* Pointer function arguments and return values are promoted to
4393 word_mode for normal functions. */
4394
4395static machine_mode
4396ix86_promote_function_mode (const_tree type, machine_mode mode,
4397 int *punsignedp, const_tree fntype,
4398 int for_return)
4399{
4400 if (cfun->machine->func_type == TYPE_NORMAL
4401 && type != NULL_TREE
4402 && POINTER_TYPE_P (type))
4403 {
4404 *punsignedp = POINTERS_EXTEND_UNSIGNED;
4405 return word_mode;
4406 }
4407 return default_promote_function_mode (type, mode, punsignedp, fntype,
4408 for_return);
4409}
4410
4411/* Return true if a structure, union or array with MODE containing FIELD
4412 should be accessed using BLKmode. */
4413
4414static bool
4415ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4416{
4417 /* Union with XFmode must be in BLKmode. */
4418 return (mode == XFmode
4419 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4420 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4421}
4422
4423rtx
4424ix86_libcall_value (machine_mode mode)
4425{
4426 return ix86_function_value_1 (NULL, NULL, orig_mode: mode, mode);
4427}
4428
4429/* Return true iff type is returned in memory. */
4430
4431static bool
4432ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4433{
4434 const machine_mode mode = type_natural_mode (type, NULL, in_return: true);
4435 HOST_WIDE_INT size;
4436
4437 if (TARGET_64BIT)
4438 {
4439 if (ix86_function_type_abi (fntype) == MS_ABI)
4440 {
4441 size = int_size_in_bytes (type);
4442
4443 /* __m128 is returned in xmm0. */
4444 if ((!type || VECTOR_INTEGER_TYPE_P (type)
4445 || INTEGRAL_TYPE_P (type)
4446 || VECTOR_FLOAT_TYPE_P (type))
4447 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4448 && !COMPLEX_MODE_P (mode)
4449 && (GET_MODE_SIZE (mode) == 16 || size == 16))
4450 return false;
4451
4452 /* Otherwise, the size must be exactly in [1248]. */
4453 return size != 1 && size != 2 && size != 4 && size != 8;
4454 }
4455 else
4456 {
4457 int needed_intregs, needed_sseregs;
4458
4459 return examine_argument (mode, type, in_return: 1,
4460 int_nregs: &needed_intregs, sse_nregs: &needed_sseregs);
4461 }
4462 }
4463 else
4464 {
4465 size = int_size_in_bytes (type);
4466
4467 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4468 bytes in registers. */
4469 if (TARGET_IAMCU)
4470 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4471
4472 if (mode == BLKmode)
4473 return true;
4474
4475 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4476 return false;
4477
4478 if (VECTOR_MODE_P (mode) || mode == TImode)
4479 {
4480 /* User-created vectors small enough to fit in EAX. */
4481 if (size < 8)
4482 return false;
4483
4484 /* Unless ABI prescibes otherwise,
4485 MMX/3dNow values are returned in MM0 if available. */
4486
4487 if (size == 8)
4488 return TARGET_VECT8_RETURNS || !TARGET_MMX;
4489
4490 /* SSE values are returned in XMM0 if available. */
4491 if (size == 16)
4492 return !TARGET_SSE;
4493
4494 /* AVX values are returned in YMM0 if available. */
4495 if (size == 32)
4496 return !TARGET_AVX;
4497
4498 /* AVX512F values are returned in ZMM0 if available. */
4499 if (size == 64)
4500 return !TARGET_AVX512F;
4501 }
4502
4503 if (mode == XFmode)
4504 return false;
4505
4506 if (size > 12)
4507 return true;
4508
4509 /* OImode shouldn't be used directly. */
4510 gcc_assert (mode != OImode);
4511
4512 return false;
4513 }
4514}
4515
4516/* Implement TARGET_PUSH_ARGUMENT. */
4517
4518static bool
4519ix86_push_argument (unsigned int npush)
4520{
4521 /* If SSE2 is available, use vector move to put large argument onto
4522 stack. NB: In 32-bit mode, use 8-byte vector move. */
4523 return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4524 && TARGET_PUSH_ARGS
4525 && !ACCUMULATE_OUTGOING_ARGS);
4526}
4527
4528
4529/* Create the va_list data type. */
4530
4531static tree
4532ix86_build_builtin_va_list_64 (void)
4533{
4534 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4535
4536 record = lang_hooks.types.make_type (RECORD_TYPE);
4537 type_decl = build_decl (BUILTINS_LOCATION,
4538 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4539
4540 f_gpr = build_decl (BUILTINS_LOCATION,
4541 FIELD_DECL, get_identifier ("gp_offset"),
4542 unsigned_type_node);
4543 f_fpr = build_decl (BUILTINS_LOCATION,
4544 FIELD_DECL, get_identifier ("fp_offset"),
4545 unsigned_type_node);
4546 f_ovf = build_decl (BUILTINS_LOCATION,
4547 FIELD_DECL, get_identifier ("overflow_arg_area"),
4548 ptr_type_node);
4549 f_sav = build_decl (BUILTINS_LOCATION,
4550 FIELD_DECL, get_identifier ("reg_save_area"),
4551 ptr_type_node);
4552
4553 va_list_gpr_counter_field = f_gpr;
4554 va_list_fpr_counter_field = f_fpr;
4555
4556 DECL_FIELD_CONTEXT (f_gpr) = record;
4557 DECL_FIELD_CONTEXT (f_fpr) = record;
4558 DECL_FIELD_CONTEXT (f_ovf) = record;
4559 DECL_FIELD_CONTEXT (f_sav) = record;
4560
4561 TYPE_STUB_DECL (record) = type_decl;
4562 TYPE_NAME (record) = type_decl;
4563 TYPE_FIELDS (record) = f_gpr;
4564 DECL_CHAIN (f_gpr) = f_fpr;
4565 DECL_CHAIN (f_fpr) = f_ovf;
4566 DECL_CHAIN (f_ovf) = f_sav;
4567
4568 layout_type (record);
4569
4570 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4571 NULL_TREE, TYPE_ATTRIBUTES (record));
4572
4573 /* The correct type is an array type of one element. */
4574 return build_array_type (record, build_index_type (size_zero_node));
4575}
4576
4577/* Setup the builtin va_list data type and for 64-bit the additional
4578 calling convention specific va_list data types. */
4579
4580static tree
4581ix86_build_builtin_va_list (void)
4582{
4583 if (TARGET_64BIT)
4584 {
4585 /* Initialize ABI specific va_list builtin types.
4586
4587 In lto1, we can encounter two va_list types:
4588 - one as a result of the type-merge across TUs, and
4589 - the one constructed here.
4590 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4591 a type identity check in canonical_va_list_type based on
4592 TYPE_MAIN_VARIANT (which we used to have) will not work.
4593 Instead, we tag each va_list_type_node with its unique attribute, and
4594 look for the attribute in the type identity check in
4595 canonical_va_list_type.
4596
4597 Tagging sysv_va_list_type_node directly with the attribute is
4598 problematic since it's a array of one record, which will degrade into a
4599 pointer to record when used as parameter (see build_va_arg comments for
4600 an example), dropping the attribute in the process. So we tag the
4601 record instead. */
4602
4603 /* For SYSV_ABI we use an array of one record. */
4604 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4605
4606 /* For MS_ABI we use plain pointer to argument area. */
4607 tree char_ptr_type = build_pointer_type (char_type_node);
4608 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4609 TYPE_ATTRIBUTES (char_ptr_type));
4610 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4611
4612 return ((ix86_abi == MS_ABI)
4613 ? ms_va_list_type_node
4614 : sysv_va_list_type_node);
4615 }
4616 else
4617 {
4618 /* For i386 we use plain pointer to argument area. */
4619 return build_pointer_type (char_type_node);
4620 }
4621}
4622
4623/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4624
4625static void
4626setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4627{
4628 rtx save_area, mem;
4629 alias_set_type set;
4630 int i, max;
4631
4632 /* GPR size of varargs save area. */
4633 if (cfun->va_list_gpr_size)
4634 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4635 else
4636 ix86_varargs_gpr_size = 0;
4637
4638 /* FPR size of varargs save area. We don't need it if we don't pass
4639 anything in SSE registers. */
4640 if (TARGET_SSE && cfun->va_list_fpr_size)
4641 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4642 else
4643 ix86_varargs_fpr_size = 0;
4644
4645 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4646 return;
4647
4648 save_area = frame_pointer_rtx;
4649 set = get_varargs_alias_set ();
4650
4651 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4652 if (max > X86_64_REGPARM_MAX)
4653 max = X86_64_REGPARM_MAX;
4654
4655 const int *parm_regs;
4656 if (cum->preserve_none_abi)
4657 parm_regs = x86_64_preserve_none_int_parameter_registers;
4658 else
4659 parm_regs = x86_64_int_parameter_registers;
4660
4661 for (i = cum->regno; i < max; i++)
4662 {
4663 mem = gen_rtx_MEM (word_mode,
4664 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4665 MEM_NOTRAP_P (mem) = 1;
4666 set_mem_alias_set (mem, set);
4667 emit_move_insn (mem,
4668 gen_rtx_REG (word_mode, parm_regs[i]));
4669 }
4670
4671 if (ix86_varargs_fpr_size)
4672 {
4673 machine_mode smode;
4674 rtx_code_label *label;
4675 rtx test;
4676
4677 /* Now emit code to save SSE registers. The AX parameter contains number
4678 of SSE parameter registers used to call this function, though all we
4679 actually check here is the zero/non-zero status. */
4680
4681 label = gen_label_rtx ();
4682 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4683 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4684 label));
4685
4686 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4687 we used movdqa (i.e. TImode) instead? Perhaps even better would
4688 be if we could determine the real mode of the data, via a hook
4689 into pass_stdarg. Ignore all that for now. */
4690 smode = V4SFmode;
4691 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4692 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4693
4694 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4695 if (max > X86_64_SSE_REGPARM_MAX)
4696 max = X86_64_SSE_REGPARM_MAX;
4697
4698 for (i = cum->sse_regno; i < max; ++i)
4699 {
4700 mem = plus_constant (Pmode, save_area,
4701 i * 16 + ix86_varargs_gpr_size);
4702 mem = gen_rtx_MEM (smode, mem);
4703 MEM_NOTRAP_P (mem) = 1;
4704 set_mem_alias_set (mem, set);
4705 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4706
4707 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4708 }
4709
4710 emit_label (label);
4711 }
4712}
4713
4714static void
4715setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4716{
4717 alias_set_type set = get_varargs_alias_set ();
4718 int i;
4719
4720 /* Reset to zero, as there might be a sysv vaarg used
4721 before. */
4722 ix86_varargs_gpr_size = 0;
4723 ix86_varargs_fpr_size = 0;
4724
4725 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4726 {
4727 rtx reg, mem;
4728
4729 mem = gen_rtx_MEM (Pmode,
4730 plus_constant (Pmode, virtual_incoming_args_rtx,
4731 i * UNITS_PER_WORD));
4732 MEM_NOTRAP_P (mem) = 1;
4733 set_mem_alias_set (mem, set);
4734
4735 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4736 emit_move_insn (mem, reg);
4737 }
4738}
4739
4740static void
4741ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4742 const function_arg_info &arg,
4743 int *, int no_rtl)
4744{
4745 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
4746 CUMULATIVE_ARGS next_cum;
4747 tree fntype;
4748
4749 /* This argument doesn't appear to be used anymore. Which is good,
4750 because the old code here didn't suppress rtl generation. */
4751 gcc_assert (!no_rtl);
4752
4753 if (!TARGET_64BIT)
4754 return;
4755
4756 fntype = TREE_TYPE (current_function_decl);
4757
4758 /* For varargs, we do not want to skip the dummy va_dcl argument.
4759 For stdargs, we do want to skip the last named argument. */
4760 next_cum = *cum;
4761 if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
4762 || arg.type != NULL_TREE)
4763 && stdarg_p (fntype))
4764 ix86_function_arg_advance (cum_v: pack_cumulative_args (arg: &next_cum), arg);
4765
4766 if (cum->call_abi == MS_ABI)
4767 setup_incoming_varargs_ms_64 (&next_cum);
4768 else
4769 setup_incoming_varargs_64 (&next_cum);
4770}
4771
4772/* Checks if TYPE is of kind va_list char *. */
4773
4774static bool
4775is_va_list_char_pointer (tree type)
4776{
4777 tree canonic;
4778
4779 /* For 32-bit it is always true. */
4780 if (!TARGET_64BIT)
4781 return true;
4782 canonic = ix86_canonical_va_list_type (type);
4783 return (canonic == ms_va_list_type_node
4784 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4785}
4786
4787/* Implement va_start. */
4788
4789static void
4790ix86_va_start (tree valist, rtx nextarg)
4791{
4792 HOST_WIDE_INT words, n_gpr, n_fpr;
4793 tree f_gpr, f_fpr, f_ovf, f_sav;
4794 tree gpr, fpr, ovf, sav, t;
4795 tree type;
4796 rtx ovf_rtx;
4797
4798 if (flag_split_stack
4799 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4800 {
4801 unsigned int scratch_regno;
4802
4803 /* When we are splitting the stack, we can't refer to the stack
4804 arguments using internal_arg_pointer, because they may be on
4805 the old stack. The split stack prologue will arrange to
4806 leave a pointer to the old stack arguments in a scratch
4807 register, which we here copy to a pseudo-register. The split
4808 stack prologue can't set the pseudo-register directly because
4809 it (the prologue) runs before any registers have been saved. */
4810
4811 scratch_regno = split_stack_prologue_scratch_regno ();
4812 if (scratch_regno != INVALID_REGNUM)
4813 {
4814 rtx reg;
4815 rtx_insn *seq;
4816
4817 reg = gen_reg_rtx (Pmode);
4818 cfun->machine->split_stack_varargs_pointer = reg;
4819
4820 start_sequence ();
4821 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4822 seq = end_sequence ();
4823
4824 push_topmost_sequence ();
4825 emit_insn_after (seq, entry_of_function ());
4826 pop_topmost_sequence ();
4827 }
4828 }
4829
4830 /* Only 64bit target needs something special. */
4831 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4832 {
4833 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4834 std_expand_builtin_va_start (valist, nextarg);
4835 else
4836 {
4837 rtx va_r, next;
4838
4839 va_r = expand_expr (exp: valist, NULL_RTX, VOIDmode, modifier: EXPAND_WRITE);
4840 next = expand_binop (ptr_mode, add_optab,
4841 cfun->machine->split_stack_varargs_pointer,
4842 crtl->args.arg_offset_rtx,
4843 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4844 convert_move (va_r, next, 0);
4845 }
4846 return;
4847 }
4848
4849 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4850 f_fpr = DECL_CHAIN (f_gpr);
4851 f_ovf = DECL_CHAIN (f_fpr);
4852 f_sav = DECL_CHAIN (f_ovf);
4853
4854 valist = build_simple_mem_ref (valist);
4855 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4856 /* The following should be folded into the MEM_REF offset. */
4857 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4858 f_gpr, NULL_TREE);
4859 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4860 f_fpr, NULL_TREE);
4861 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4862 f_ovf, NULL_TREE);
4863 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4864 f_sav, NULL_TREE);
4865
4866 /* Count number of gp and fp argument registers used. */
4867 words = crtl->args.info.words;
4868 n_gpr = crtl->args.info.regno;
4869 n_fpr = crtl->args.info.sse_regno;
4870
4871 if (cfun->va_list_gpr_size)
4872 {
4873 type = TREE_TYPE (gpr);
4874 t = build2 (MODIFY_EXPR, type,
4875 gpr, build_int_cst (type, n_gpr * 8));
4876 TREE_SIDE_EFFECTS (t) = 1;
4877 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4878 }
4879
4880 if (TARGET_SSE && cfun->va_list_fpr_size)
4881 {
4882 type = TREE_TYPE (fpr);
4883 t = build2 (MODIFY_EXPR, type, fpr,
4884 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4885 TREE_SIDE_EFFECTS (t) = 1;
4886 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4887 }
4888
4889 /* Find the overflow area. */
4890 type = TREE_TYPE (ovf);
4891 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4892 ovf_rtx = crtl->args.internal_arg_pointer;
4893 else
4894 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4895 t = make_tree (type, ovf_rtx);
4896 if (words != 0)
4897 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4898
4899 t = build2 (MODIFY_EXPR, type, ovf, t);
4900 TREE_SIDE_EFFECTS (t) = 1;
4901 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4902
4903 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4904 {
4905 /* Find the register save area.
4906 Prologue of the function save it right above stack frame. */
4907 type = TREE_TYPE (sav);
4908 t = make_tree (type, frame_pointer_rtx);
4909 if (!ix86_varargs_gpr_size)
4910 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4911
4912 t = build2 (MODIFY_EXPR, type, sav, t);
4913 TREE_SIDE_EFFECTS (t) = 1;
4914 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4915 }
4916}
4917
4918/* Implement va_arg. */
4919
4920static tree
4921ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4922 gimple_seq *post_p)
4923{
4924 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4925 tree f_gpr, f_fpr, f_ovf, f_sav;
4926 tree gpr, fpr, ovf, sav, t;
4927 int size, rsize;
4928 tree lab_false, lab_over = NULL_TREE;
4929 tree addr, t2;
4930 rtx container;
4931 int indirect_p = 0;
4932 tree ptrtype;
4933 machine_mode nat_mode;
4934 unsigned int arg_boundary;
4935 unsigned int type_align;
4936
4937 /* Only 64bit target needs something special. */
4938 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4939 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4940
4941 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4942 f_fpr = DECL_CHAIN (f_gpr);
4943 f_ovf = DECL_CHAIN (f_fpr);
4944 f_sav = DECL_CHAIN (f_ovf);
4945
4946 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4947 valist, f_gpr, NULL_TREE);
4948
4949 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4950 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4951 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4952
4953 indirect_p = pass_va_arg_by_reference (type);
4954 if (indirect_p)
4955 type = build_pointer_type (type);
4956 size = arg_int_size_in_bytes (type);
4957 rsize = CEIL (size, UNITS_PER_WORD);
4958
4959 nat_mode = type_natural_mode (type, NULL, in_return: false);
4960 switch (nat_mode)
4961 {
4962 case E_V16HFmode:
4963 case E_V16BFmode:
4964 case E_V8SFmode:
4965 case E_V8SImode:
4966 case E_V32QImode:
4967 case E_V16HImode:
4968 case E_V4DFmode:
4969 case E_V4DImode:
4970 case E_V32HFmode:
4971 case E_V32BFmode:
4972 case E_V16SFmode:
4973 case E_V16SImode:
4974 case E_V64QImode:
4975 case E_V32HImode:
4976 case E_V8DFmode:
4977 case E_V8DImode:
4978 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4979 if (!TARGET_64BIT_MS_ABI)
4980 {
4981 container = NULL;
4982 break;
4983 }
4984 /* FALLTHRU */
4985
4986 default:
4987 container = construct_container (mode: nat_mode, TYPE_MODE (type),
4988 type, in_return: 0, X86_64_REGPARM_MAX,
4989 X86_64_SSE_REGPARM_MAX, intreg,
4990 sse_regno: 0);
4991 break;
4992 }
4993
4994 /* Pull the value out of the saved registers. */
4995
4996 addr = create_tmp_var (ptr_type_node, "addr");
4997 type_align = TYPE_ALIGN (type);
4998
4999 if (container)
5000 {
5001 int needed_intregs, needed_sseregs;
5002 bool need_temp;
5003 tree int_addr, sse_addr;
5004
5005 lab_false = create_artificial_label (UNKNOWN_LOCATION);
5006 lab_over = create_artificial_label (UNKNOWN_LOCATION);
5007
5008 examine_argument (mode: nat_mode, type, in_return: 0, int_nregs: &needed_intregs, sse_nregs: &needed_sseregs);
5009
5010 bool container_in_reg = false;
5011 if (REG_P (container))
5012 container_in_reg = true;
5013 else if (GET_CODE (container) == PARALLEL
5014 && GET_MODE (container) == BLKmode
5015 && XVECLEN (container, 0) == 1)
5016 {
5017 /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST
5018 expression in a TImode register. In this case, temp isn't
5019 needed. Otherwise, the TImode variable will be put in the
5020 GPR save area which guarantees only 8-byte alignment. */
5021 rtx x = XVECEXP (container, 0, 0);
5022 if (GET_CODE (x) == EXPR_LIST
5023 && REG_P (XEXP (x, 0))
5024 && XEXP (x, 1) == const0_rtx)
5025 container_in_reg = true;
5026 }
5027
5028 need_temp = (!container_in_reg
5029 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5030 || TYPE_ALIGN (type) > 128));
5031
5032 /* In case we are passing structure, verify that it is consecutive block
5033 on the register save area. If not we need to do moves. */
5034 if (!need_temp && !container_in_reg)
5035 {
5036 /* Verify that all registers are strictly consecutive */
5037 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5038 {
5039 int i;
5040
5041 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5042 {
5043 rtx slot = XVECEXP (container, 0, i);
5044 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5045 || INTVAL (XEXP (slot, 1)) != i * 16)
5046 need_temp = true;
5047 }
5048 }
5049 else
5050 {
5051 int i;
5052
5053 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5054 {
5055 rtx slot = XVECEXP (container, 0, i);
5056 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5057 || INTVAL (XEXP (slot, 1)) != i * 8)
5058 need_temp = true;
5059 }
5060 }
5061 }
5062 if (!need_temp)
5063 {
5064 int_addr = addr;
5065 sse_addr = addr;
5066 }
5067 else
5068 {
5069 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5070 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5071 }
5072
5073 /* First ensure that we fit completely in registers. */
5074 if (needed_intregs)
5075 {
5076 t = build_int_cst (TREE_TYPE (gpr),
5077 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
5078 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5079 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5080 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5081 gimplify_and_add (t, pre_p);
5082 }
5083 if (needed_sseregs)
5084 {
5085 t = build_int_cst (TREE_TYPE (fpr),
5086 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5087 + X86_64_REGPARM_MAX * 8);
5088 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5089 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5090 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5091 gimplify_and_add (t, pre_p);
5092 }
5093
5094 /* Compute index to start of area used for integer regs. */
5095 if (needed_intregs)
5096 {
5097 /* int_addr = gpr + sav; */
5098 t = fold_build_pointer_plus (sav, gpr);
5099 gimplify_assign (int_addr, t, pre_p);
5100 }
5101 if (needed_sseregs)
5102 {
5103 /* sse_addr = fpr + sav; */
5104 t = fold_build_pointer_plus (sav, fpr);
5105 gimplify_assign (sse_addr, t, pre_p);
5106 }
5107 if (need_temp)
5108 {
5109 int i, prev_size = 0;
5110 tree temp = create_tmp_var (type, "va_arg_tmp");
5111 TREE_ADDRESSABLE (temp) = 1;
5112
5113 /* addr = &temp; */
5114 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5115 gimplify_assign (addr, t, pre_p);
5116
5117 for (i = 0; i < XVECLEN (container, 0); i++)
5118 {
5119 rtx slot = XVECEXP (container, 0, i);
5120 rtx reg = XEXP (slot, 0);
5121 machine_mode mode = GET_MODE (reg);
5122 tree piece_type;
5123 tree addr_type;
5124 tree daddr_type;
5125 tree src_addr, src;
5126 int src_offset;
5127 tree dest_addr, dest;
5128 int cur_size = GET_MODE_SIZE (mode);
5129
5130 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
5131 prev_size = INTVAL (XEXP (slot, 1));
5132 if (prev_size + cur_size > size)
5133 {
5134 cur_size = size - prev_size;
5135 unsigned int nbits = cur_size * BITS_PER_UNIT;
5136 if (!int_mode_for_size (size: nbits, limit: 1).exists (mode: &mode))
5137 mode = QImode;
5138 }
5139 piece_type = lang_hooks.types.type_for_mode (mode, 1);
5140 if (mode == GET_MODE (reg))
5141 addr_type = build_pointer_type (piece_type);
5142 else
5143 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5144 true);
5145 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5146 true);
5147
5148 if (SSE_REGNO_P (REGNO (reg)))
5149 {
5150 src_addr = sse_addr;
5151 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5152 }
5153 else
5154 {
5155 src_addr = int_addr;
5156 src_offset = REGNO (reg) * 8;
5157 }
5158 src_addr = fold_convert (addr_type, src_addr);
5159 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
5160
5161 dest_addr = fold_convert (daddr_type, addr);
5162 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
5163 if (cur_size == GET_MODE_SIZE (mode))
5164 {
5165 src = build_va_arg_indirect_ref (src_addr);
5166 dest = build_va_arg_indirect_ref (dest_addr);
5167
5168 gimplify_assign (dest, src, pre_p);
5169 }
5170 else
5171 {
5172 tree copy
5173 = build_call_expr (builtin_decl_implicit (fncode: BUILT_IN_MEMCPY),
5174 3, dest_addr, src_addr,
5175 size_int (cur_size));
5176 gimplify_and_add (copy, pre_p);
5177 }
5178 prev_size += cur_size;
5179 }
5180 }
5181
5182 if (needed_intregs)
5183 {
5184 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5185 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5186 gimplify_assign (gpr, t, pre_p);
5187 /* The GPR save area guarantees only 8-byte alignment. */
5188 if (!need_temp)
5189 type_align = MIN (type_align, 64);
5190 }
5191
5192 if (needed_sseregs)
5193 {
5194 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5195 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5196 gimplify_assign (unshare_expr (fpr), t, pre_p);
5197 }
5198
5199 gimple_seq_add_stmt (pre_p, gimple_build_goto (dest: lab_over));
5200
5201 gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_false));
5202 }
5203
5204 /* ... otherwise out of the overflow area. */
5205
5206 /* When we align parameter on stack for caller, if the parameter
5207 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5208 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5209 here with caller. */
5210 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
5211 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
5212 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
5213
5214 /* Care for on-stack alignment if needed. */
5215 if (arg_boundary <= 64 || size == 0)
5216 t = ovf;
5217 else
5218 {
5219 HOST_WIDE_INT align = arg_boundary / 8;
5220 t = fold_build_pointer_plus_hwi (ovf, align - 1);
5221 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5222 build_int_cst (TREE_TYPE (t), -align));
5223 }
5224
5225 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5226 gimplify_assign (addr, t, pre_p);
5227
5228 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5229 gimplify_assign (unshare_expr (ovf), t, pre_p);
5230
5231 if (container)
5232 gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_over));
5233
5234 type = build_aligned_type (type, type_align);
5235 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5236 addr = fold_convert (ptrtype, addr);
5237
5238 if (indirect_p)
5239 addr = build_va_arg_indirect_ref (addr);
5240 return build_va_arg_indirect_ref (addr);
5241}
5242
5243/* Return true if OPNUM's MEM should be matched
5244 in movabs* patterns. */
5245
5246bool
5247ix86_check_movabs (rtx insn, int opnum)
5248{
5249 rtx set, mem;
5250
5251 set = PATTERN (insn);
5252 if (GET_CODE (set) == PARALLEL)
5253 set = XVECEXP (set, 0, 0);
5254 gcc_assert (GET_CODE (set) == SET);
5255 mem = XEXP (set, opnum);
5256 while (SUBREG_P (mem))
5257 mem = SUBREG_REG (mem);
5258 gcc_assert (MEM_P (mem));
5259 return volatile_ok || !MEM_VOLATILE_P (mem);
5260}
5261
5262/* Return true if XVECEXP idx of INSN satisfies MOVS arguments. */
5263bool
5264ix86_check_movs (rtx insn, int idx)
5265{
5266 rtx pat = PATTERN (insn);
5267 gcc_assert (GET_CODE (pat) == PARALLEL);
5268
5269 rtx set = XVECEXP (pat, 0, idx);
5270 gcc_assert (GET_CODE (set) == SET);
5271
5272 rtx dst = SET_DEST (set);
5273 gcc_assert (MEM_P (dst));
5274
5275 rtx src = SET_SRC (set);
5276 gcc_assert (MEM_P (src));
5277
5278 return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
5279 && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
5280 || Pmode == word_mode));
5281}
5282
5283/* Return false if INSN contains a MEM with a non-default address space. */
5284bool
5285ix86_check_no_addr_space (rtx insn)
5286{
5287 subrtx_var_iterator::array_type array;
5288 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5289 {
5290 rtx x = *iter;
5291 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5292 return false;
5293 }
5294 return true;
5295}
5296
5297/* Initialize the table of extra 80387 mathematical constants. */
5298
5299static void
5300init_ext_80387_constants (void)
5301{
5302 static const char * cst[5] =
5303 {
5304 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5305 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5306 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5307 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5308 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5309 };
5310 int i;
5311
5312 for (i = 0; i < 5; i++)
5313 {
5314 real_from_string (&ext_80387_constants_table[i], cst[i]);
5315 /* Ensure each constant is rounded to XFmode precision. */
5316 real_convert (&ext_80387_constants_table[i],
5317 XFmode, &ext_80387_constants_table[i]);
5318 }
5319
5320 ext_80387_constants_init = 1;
5321}
5322
5323/* Return non-zero if the constant is something that
5324 can be loaded with a special instruction. */
5325
5326int
5327standard_80387_constant_p (rtx x)
5328{
5329 machine_mode mode = GET_MODE (x);
5330
5331 const REAL_VALUE_TYPE *r;
5332
5333 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5334 return -1;
5335
5336 if (x == CONST0_RTX (mode))
5337 return 1;
5338 if (x == CONST1_RTX (mode))
5339 return 2;
5340
5341 r = CONST_DOUBLE_REAL_VALUE (x);
5342
5343 /* For XFmode constants, try to find a special 80387 instruction when
5344 optimizing for size or on those CPUs that benefit from them. */
5345 if (mode == XFmode
5346 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5347 && !flag_rounding_math)
5348 {
5349 int i;
5350
5351 if (! ext_80387_constants_init)
5352 init_ext_80387_constants ();
5353
5354 for (i = 0; i < 5; i++)
5355 if (real_identical (r, &ext_80387_constants_table[i]))
5356 return i + 3;
5357 }
5358
5359 /* Load of the constant -0.0 or -1.0 will be split as
5360 fldz;fchs or fld1;fchs sequence. */
5361 if (real_isnegzero (r))
5362 return 8;
5363 if (real_identical (r, &dconstm1))
5364 return 9;
5365
5366 return 0;
5367}
5368
5369/* Return the opcode of the special instruction to be used to load
5370 the constant X. */
5371
5372const char *
5373standard_80387_constant_opcode (rtx x)
5374{
5375 switch (standard_80387_constant_p (x))
5376 {
5377 case 1:
5378 return "fldz";
5379 case 2:
5380 return "fld1";
5381 case 3:
5382 return "fldlg2";
5383 case 4:
5384 return "fldln2";
5385 case 5:
5386 return "fldl2e";
5387 case 6:
5388 return "fldl2t";
5389 case 7:
5390 return "fldpi";
5391 case 8:
5392 case 9:
5393 return "#";
5394 default:
5395 gcc_unreachable ();
5396 }
5397}
5398
5399/* Return the CONST_DOUBLE representing the 80387 constant that is
5400 loaded by the specified special instruction. The argument IDX
5401 matches the return value from standard_80387_constant_p. */
5402
5403rtx
5404standard_80387_constant_rtx (int idx)
5405{
5406 int i;
5407
5408 if (! ext_80387_constants_init)
5409 init_ext_80387_constants ();
5410
5411 switch (idx)
5412 {
5413 case 3:
5414 case 4:
5415 case 5:
5416 case 6:
5417 case 7:
5418 i = idx - 3;
5419 break;
5420
5421 default:
5422 gcc_unreachable ();
5423 }
5424
5425 return const_double_from_real_value (ext_80387_constants_table[i],
5426 XFmode);
5427}
5428
5429/* Return 1 if X is all bits 0, 2 if X is all bits 1
5430 and 3 if X is all bits 1 with zero extend
5431 in supported SSE/AVX vector mode. */
5432
5433int
5434standard_sse_constant_p (rtx x, machine_mode pred_mode)
5435{
5436 machine_mode mode;
5437
5438 if (!TARGET_SSE)
5439 return 0;
5440
5441 mode = GET_MODE (x);
5442
5443 if (x == const0_rtx || const0_operand (x, mode))
5444 return 1;
5445
5446 if (x == constm1_rtx
5447 || vector_all_ones_operand (x, mode)
5448 || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5449 || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5450 && float_vector_all_ones_operand (x, mode)))
5451 {
5452 /* VOIDmode integer constant, get mode from the predicate. */
5453 if (mode == VOIDmode)
5454 mode = pred_mode;
5455
5456 switch (GET_MODE_SIZE (mode))
5457 {
5458 case 64:
5459 if (TARGET_AVX512F)
5460 return 2;
5461 break;
5462 case 32:
5463 if (TARGET_AVX2)
5464 return 2;
5465 break;
5466 case 16:
5467 if (TARGET_SSE2)
5468 return 2;
5469 break;
5470 case 0:
5471 /* VOIDmode */
5472 gcc_unreachable ();
5473 default:
5474 break;
5475 }
5476 }
5477
5478 if (vector_all_ones_zero_extend_half_operand (x, mode)
5479 || vector_all_ones_zero_extend_quarter_operand (x, mode))
5480 return 3;
5481
5482 return 0;
5483}
5484
5485/* Return the opcode of the special instruction to be used to load
5486 the constant operands[1] into operands[0]. */
5487
5488const char *
5489standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5490{
5491 machine_mode mode;
5492 rtx x = operands[1];
5493
5494 gcc_assert (TARGET_SSE);
5495
5496 mode = GET_MODE (x);
5497
5498 if (x == const0_rtx || const0_operand (x, mode))
5499 {
5500 switch (get_attr_mode (insn))
5501 {
5502 case MODE_TI:
5503 if (!EXT_REX_SSE_REG_P (operands[0]))
5504 return "%vpxor\t%0, %d0";
5505 /* FALLTHRU */
5506 case MODE_XI:
5507 case MODE_OI:
5508 if (EXT_REX_SSE_REG_P (operands[0]))
5509 {
5510 if (TARGET_AVX512VL)
5511 return "vpxord\t%x0, %x0, %x0";
5512 else
5513 return "vpxord\t%g0, %g0, %g0";
5514 }
5515 return "vpxor\t%x0, %x0, %x0";
5516
5517 case MODE_V2DF:
5518 if (!EXT_REX_SSE_REG_P (operands[0]))
5519 return "%vxorpd\t%0, %d0";
5520 /* FALLTHRU */
5521 case MODE_V8DF:
5522 case MODE_V4DF:
5523 if (EXT_REX_SSE_REG_P (operands[0]))
5524 {
5525 if (TARGET_AVX512DQ)
5526 {
5527 if (TARGET_AVX512VL)
5528 return "vxorpd\t%x0, %x0, %x0";
5529 else
5530 return "vxorpd\t%g0, %g0, %g0";
5531 }
5532 else
5533 {
5534 if (TARGET_AVX512VL)
5535 return "vpxorq\t%x0, %x0, %x0";
5536 else
5537 return "vpxorq\t%g0, %g0, %g0";
5538 }
5539 }
5540 return "vxorpd\t%x0, %x0, %x0";
5541
5542 case MODE_V4SF:
5543 if (!EXT_REX_SSE_REG_P (operands[0]))
5544 return "%vxorps\t%0, %d0";
5545 /* FALLTHRU */
5546 case MODE_V16SF:
5547 case MODE_V8SF:
5548 if (EXT_REX_SSE_REG_P (operands[0]))
5549 {
5550 if (TARGET_AVX512DQ)
5551 {
5552 if (TARGET_AVX512VL)
5553 return "vxorps\t%x0, %x0, %x0";
5554 else
5555 return "vxorps\t%g0, %g0, %g0";
5556 }
5557 else
5558 {
5559 if (TARGET_AVX512VL)
5560 return "vpxord\t%x0, %x0, %x0";
5561 else
5562 return "vpxord\t%g0, %g0, %g0";
5563 }
5564 }
5565 return "vxorps\t%x0, %x0, %x0";
5566
5567 default:
5568 gcc_unreachable ();
5569 }
5570 }
5571 else if (x == constm1_rtx
5572 || vector_all_ones_operand (x, mode)
5573 || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5574 && float_vector_all_ones_operand (x, mode)))
5575 {
5576 enum attr_mode insn_mode = get_attr_mode (insn);
5577
5578 switch (insn_mode)
5579 {
5580 case MODE_XI:
5581 case MODE_V8DF:
5582 case MODE_V16SF:
5583 gcc_assert (TARGET_AVX512F);
5584 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5585
5586 case MODE_OI:
5587 case MODE_V4DF:
5588 case MODE_V8SF:
5589 gcc_assert (TARGET_AVX2);
5590 /* FALLTHRU */
5591 case MODE_TI:
5592 case MODE_V2DF:
5593 case MODE_V4SF:
5594 gcc_assert (TARGET_SSE2);
5595 if (EXT_REX_SSE_REG_P (operands[0]))
5596 {
5597 if (TARGET_AVX512VL)
5598 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5599 else
5600 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5601 }
5602 return (TARGET_AVX
5603 ? "vpcmpeqd\t%0, %0, %0"
5604 : "pcmpeqd\t%0, %0");
5605
5606 default:
5607 gcc_unreachable ();
5608 }
5609 }
5610 else if (vector_all_ones_zero_extend_half_operand (x, mode))
5611 {
5612 if (GET_MODE_SIZE (mode) == 64)
5613 {
5614 gcc_assert (TARGET_AVX512F);
5615 return "vpcmpeqd\t%t0, %t0, %t0";
5616 }
5617 else if (GET_MODE_SIZE (mode) == 32)
5618 {
5619 gcc_assert (TARGET_AVX);
5620 return "vpcmpeqd\t%x0, %x0, %x0";
5621 }
5622 gcc_unreachable ();
5623 }
5624 else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
5625 {
5626 gcc_assert (TARGET_AVX512F);
5627 return "vpcmpeqd\t%x0, %x0, %x0";
5628 }
5629
5630 gcc_unreachable ();
5631}
5632
5633/* Returns true if INSN can be transformed from a memory load
5634 to a supported FP constant load. */
5635
5636bool
5637ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5638{
5639 rtx src = find_constant_src (insn);
5640
5641 gcc_assert (REG_P (dst));
5642
5643 if (src == NULL
5644 || (SSE_REGNO_P (REGNO (dst))
5645 && standard_sse_constant_p (x: src, GET_MODE (dst)) != 1)
5646 || (!TARGET_AVX512VL
5647 && EXT_REX_SSE_REGNO_P (REGNO (dst))
5648 && standard_sse_constant_p (x: src, GET_MODE (dst)) == 1)
5649 || (STACK_REGNO_P (REGNO (dst))
5650 && standard_80387_constant_p (x: src) < 1))
5651 return false;
5652
5653 return true;
5654}
5655
5656/* Predicate for pre-reload splitters with associated instructions,
5657 which can match any time before the split1 pass (usually combine),
5658 then are unconditionally split in that pass and should not be
5659 matched again afterwards. */
5660
5661bool
5662ix86_pre_reload_split (void)
5663{
5664 return (can_create_pseudo_p ()
5665 && !(cfun->curr_properties & PROP_rtl_split_insns));
5666}
5667
5668/* Return the opcode of the TYPE_SSEMOV instruction. To move from
5669 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5670 TARGET_AVX512VL or it is a register to register move which can
5671 be done with zmm register move. */
5672
5673static const char *
5674ix86_get_ssemov (rtx *operands, unsigned size,
5675 enum attr_mode insn_mode, machine_mode mode)
5676{
5677 char buf[128];
5678 bool misaligned_p = (misaligned_operand (operands[0], mode)
5679 || misaligned_operand (operands[1], mode));
5680 bool evex_reg_p = (size == 64
5681 || EXT_REX_SSE_REG_P (operands[0])
5682 || EXT_REX_SSE_REG_P (operands[1]));
5683
5684 bool egpr_p = (TARGET_APX_EGPR
5685 && (x86_extended_rex2reg_mentioned_p (operands[0])
5686 || x86_extended_rex2reg_mentioned_p (operands[1])));
5687 bool egpr_vl = egpr_p && TARGET_AVX512VL;
5688
5689 machine_mode scalar_mode;
5690
5691 const char *opcode = NULL;
5692 enum
5693 {
5694 opcode_int,
5695 opcode_float,
5696 opcode_double
5697 } type = opcode_int;
5698
5699 switch (insn_mode)
5700 {
5701 case MODE_V16SF:
5702 case MODE_V8SF:
5703 case MODE_V4SF:
5704 scalar_mode = E_SFmode;
5705 type = opcode_float;
5706 break;
5707 case MODE_V8DF:
5708 case MODE_V4DF:
5709 case MODE_V2DF:
5710 scalar_mode = E_DFmode;
5711 type = opcode_double;
5712 break;
5713 case MODE_XI:
5714 case MODE_OI:
5715 case MODE_TI:
5716 scalar_mode = GET_MODE_INNER (mode);
5717 break;
5718 default:
5719 gcc_unreachable ();
5720 }
5721
5722 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5723 we can only use zmm register move without memory operand. */
5724 if (evex_reg_p
5725 && !TARGET_AVX512VL
5726 && GET_MODE_SIZE (mode) < 64)
5727 {
5728 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5729 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5730 AVX512VL is disabled, LRA can still generate reg to
5731 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5732 modes. */
5733 if (memory_operand (operands[0], mode)
5734 || memory_operand (operands[1], mode))
5735 gcc_unreachable ();
5736 size = 64;
5737 switch (type)
5738 {
5739 case opcode_int:
5740 if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
5741 opcode = (misaligned_p
5742 ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5743 : "vmovdqa64");
5744 else
5745 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5746 break;
5747 case opcode_float:
5748 opcode = misaligned_p ? "vmovups" : "vmovaps";
5749 break;
5750 case opcode_double:
5751 opcode = misaligned_p ? "vmovupd" : "vmovapd";
5752 break;
5753 }
5754 }
5755 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5756 {
5757 switch (scalar_mode)
5758 {
5759 case E_HFmode:
5760 case E_BFmode:
5761 if (evex_reg_p || egpr_vl)
5762 opcode = (misaligned_p
5763 ? (TARGET_AVX512BW
5764 ? "vmovdqu16"
5765 : "vmovdqu64")
5766 : "vmovdqa64");
5767 else if (egpr_p)
5768 opcode = (misaligned_p
5769 ? (TARGET_AVX512BW
5770 ? "vmovdqu16"
5771 : "%vmovups")
5772 : "%vmovaps");
5773 else
5774 opcode = (misaligned_p
5775 ? (TARGET_AVX512BW && evex_reg_p
5776 ? "vmovdqu16"
5777 : "%vmovdqu")
5778 : "%vmovdqa");
5779 break;
5780 case E_SFmode:
5781 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5782 break;
5783 case E_DFmode:
5784 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5785 break;
5786 case E_TFmode:
5787 if (evex_reg_p || egpr_vl)
5788 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5789 else if (egpr_p)
5790 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5791 else
5792 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5793 break;
5794 default:
5795 gcc_unreachable ();
5796 }
5797 }
5798 else if (SCALAR_INT_MODE_P (scalar_mode))
5799 {
5800 switch (scalar_mode)
5801 {
5802 case E_QImode:
5803 if (evex_reg_p || egpr_vl)
5804 opcode = (misaligned_p
5805 ? (TARGET_AVX512BW
5806 ? "vmovdqu8"
5807 : "vmovdqu64")
5808 : "vmovdqa64");
5809 else if (egpr_p)
5810 opcode = (misaligned_p
5811 ? (TARGET_AVX512BW
5812 ? "vmovdqu8"
5813 : "%vmovups")
5814 : "%vmovaps");
5815 else
5816 opcode = (misaligned_p
5817 ? (TARGET_AVX512BW && evex_reg_p
5818 ? "vmovdqu8"
5819 : "%vmovdqu")
5820 : "%vmovdqa");
5821 break;
5822 case E_HImode:
5823 if (evex_reg_p || egpr_vl)
5824 opcode = (misaligned_p
5825 ? (TARGET_AVX512BW
5826 ? "vmovdqu16"
5827 : "vmovdqu64")
5828 : "vmovdqa64");
5829 else if (egpr_p)
5830 opcode = (misaligned_p
5831 ? (TARGET_AVX512BW
5832 ? "vmovdqu16"
5833 : "%vmovups")
5834 : "%vmovaps");
5835 else
5836 opcode = (misaligned_p
5837 ? (TARGET_AVX512BW && evex_reg_p
5838 ? "vmovdqu16"
5839 : "%vmovdqu")
5840 : "%vmovdqa");
5841 break;
5842 case E_SImode:
5843 if (evex_reg_p || egpr_vl)
5844 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5845 else if (egpr_p)
5846 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5847 else
5848 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5849 break;
5850 case E_DImode:
5851 case E_TImode:
5852 case E_OImode:
5853 if (evex_reg_p || egpr_vl)
5854 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5855 else if (egpr_p)
5856 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5857 else
5858 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5859 break;
5860 case E_XImode:
5861 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5862 break;
5863 default:
5864 gcc_unreachable ();
5865 }
5866 }
5867 else
5868 gcc_unreachable ();
5869
5870 switch (size)
5871 {
5872 case 64:
5873 snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%g1, %%g0|%%g0, %%g1}",
5874 opcode);
5875 break;
5876 case 32:
5877 snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%t1, %%t0|%%t0, %%t1}",
5878 opcode);
5879 break;
5880 case 16:
5881 snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%x1, %%x0|%%x0, %%x1}",
5882 opcode);
5883 break;
5884 default:
5885 gcc_unreachable ();
5886 }
5887 output_asm_insn (buf, operands);
5888 return "";
5889}
5890
5891/* Return the template of the TYPE_SSEMOV instruction to move
5892 operands[1] into operands[0]. */
5893
5894const char *
5895ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5896{
5897 machine_mode mode = GET_MODE (operands[0]);
5898 if (get_attr_type (insn) != TYPE_SSEMOV
5899 || mode != GET_MODE (operands[1]))
5900 gcc_unreachable ();
5901
5902 enum attr_mode insn_mode = get_attr_mode (insn);
5903
5904 switch (insn_mode)
5905 {
5906 case MODE_XI:
5907 case MODE_V8DF:
5908 case MODE_V16SF:
5909 return ix86_get_ssemov (operands, size: 64, insn_mode, mode);
5910
5911 case MODE_OI:
5912 case MODE_V4DF:
5913 case MODE_V8SF:
5914 return ix86_get_ssemov (operands, size: 32, insn_mode, mode);
5915
5916 case MODE_TI:
5917 case MODE_V2DF:
5918 case MODE_V4SF:
5919 return ix86_get_ssemov (operands, size: 16, insn_mode, mode);
5920
5921 case MODE_DI:
5922 /* Handle broken assemblers that require movd instead of movq. */
5923 if (GENERAL_REG_P (operands[0]))
5924 {
5925 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5926 return "%vmovq\t{%1, %q0|%q0, %1}";
5927 else
5928 return "%vmovd\t{%1, %q0|%q0, %1}";
5929 }
5930 else if (GENERAL_REG_P (operands[1]))
5931 {
5932 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5933 return "%vmovq\t{%q1, %0|%0, %q1}";
5934 else
5935 return "%vmovd\t{%q1, %0|%0, %q1}";
5936 }
5937 else
5938 return "%vmovq\t{%1, %0|%0, %1}";
5939
5940 case MODE_SI:
5941 if (GENERAL_REG_P (operands[0]))
5942 return "%vmovd\t{%1, %k0|%k0, %1}";
5943 else if (GENERAL_REG_P (operands[1]))
5944 return "%vmovd\t{%k1, %0|%0, %k1}";
5945 else
5946 return "%vmovd\t{%1, %0|%0, %1}";
5947
5948 case MODE_HI:
5949 if (GENERAL_REG_P (operands[0]))
5950 return "vmovw\t{%1, %k0|%k0, %1}";
5951 else if (GENERAL_REG_P (operands[1]))
5952 return "vmovw\t{%k1, %0|%0, %k1}";
5953 else
5954 return "vmovw\t{%1, %0|%0, %1}";
5955
5956 case MODE_DF:
5957 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5958 return "vmovsd\t{%d1, %0|%0, %d1}";
5959 else
5960 return "%vmovsd\t{%1, %0|%0, %1}";
5961
5962 case MODE_SF:
5963 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5964 return "vmovss\t{%d1, %0|%0, %d1}";
5965 else
5966 return "%vmovss\t{%1, %0|%0, %1}";
5967
5968 case MODE_HF:
5969 case MODE_BF:
5970 if (REG_P (operands[0]) && REG_P (operands[1]))
5971 return "vmovsh\t{%d1, %0|%0, %d1}";
5972 else
5973 return "vmovsh\t{%1, %0|%0, %1}";
5974
5975 case MODE_V1DF:
5976 gcc_assert (!TARGET_AVX);
5977 return "movlpd\t{%1, %0|%0, %1}";
5978
5979 case MODE_V2SF:
5980 if (TARGET_AVX && REG_P (operands[0]))
5981 return "vmovlps\t{%1, %d0|%d0, %1}";
5982 else
5983 return "%vmovlps\t{%1, %0|%0, %1}";
5984
5985 default:
5986 gcc_unreachable ();
5987 }
5988}
5989
5990/* Returns true if OP contains a symbol reference */
5991
5992bool
5993symbolic_reference_mentioned_p (rtx op)
5994{
5995 const char *fmt;
5996 int i;
5997
5998 if (SYMBOL_REF_P (op) || LABEL_REF_P (op))
5999 return true;
6000
6001 fmt = GET_RTX_FORMAT (GET_CODE (op));
6002 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
6003 {
6004 if (fmt[i] == 'E')
6005 {
6006 int j;
6007
6008 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
6009 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
6010 return true;
6011 }
6012
6013 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
6014 return true;
6015 }
6016
6017 return false;
6018}
6019
6020/* Return true if it is appropriate to emit `ret' instructions in the
6021 body of a function. Do this only if the epilogue is simple, needing a
6022 couple of insns. Prior to reloading, we can't tell how many registers
6023 must be saved, so return false then. Return false if there is no frame
6024 marker to de-allocate. */
6025
6026bool
6027ix86_can_use_return_insn_p (void)
6028{
6029 if (ix86_function_ms_hook_prologue (fn: current_function_decl))
6030 return false;
6031
6032 if (ix86_function_naked (fn: current_function_decl))
6033 return false;
6034
6035 /* Don't use `ret' instruction in interrupt handler. */
6036 if (! reload_completed
6037 || frame_pointer_needed
6038 || cfun->machine->func_type != TYPE_NORMAL)
6039 return 0;
6040
6041 /* Don't allow more than 32k pop, since that's all we can do
6042 with one instruction. */
6043 if (crtl->args.pops_args && crtl->args.size >= 32768)
6044 return 0;
6045
6046 struct ix86_frame &frame = cfun->machine->frame;
6047 return (frame.stack_pointer_offset == UNITS_PER_WORD
6048 && (frame.nregs + frame.nsseregs) == 0);
6049}
6050
6051/* Return stack frame size. get_frame_size () returns used stack slots
6052 during compilation, which may be optimized out later. If stack frame
6053 is needed, stack_frame_required should be true. */
6054
6055static HOST_WIDE_INT
6056ix86_get_frame_size (void)
6057{
6058 if (cfun->machine->stack_frame_required)
6059 return get_frame_size ();
6060 else
6061 return 0;
6062}
6063
6064/* Value should be nonzero if functions must have frame pointers.
6065 Zero means the frame pointer need not be set up (and parms may
6066 be accessed via the stack pointer) in functions that seem suitable. */
6067
6068static bool
6069ix86_frame_pointer_required (void)
6070{
6071 /* If we accessed previous frames, then the generated code expects
6072 to be able to access the saved ebp value in our frame. */
6073 if (cfun->machine->accesses_prev_frame)
6074 return true;
6075
6076 /* Several x86 os'es need a frame pointer for other reasons,
6077 usually pertaining to setjmp. */
6078 if (SUBTARGET_FRAME_POINTER_REQUIRED)
6079 return true;
6080
6081 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
6082 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
6083 return true;
6084
6085 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
6086 allocation is 4GB. */
6087 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
6088 return true;
6089
6090 /* SSE saves require frame-pointer when stack is misaligned. */
6091 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
6092 return true;
6093
6094 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
6095 turns off the frame pointer by default. Turn it back on now if
6096 we've not got a leaf function. */
6097 if (TARGET_OMIT_LEAF_FRAME_POINTER
6098 && (!crtl->is_leaf
6099 || ix86_current_function_calls_tls_descriptor))
6100 return true;
6101
6102 /* Several versions of mcount for the x86 assumes that there is a
6103 frame, so we cannot allow profiling without a frame pointer. */
6104 if (crtl->profile && !flag_fentry)
6105 return true;
6106
6107 return false;
6108}
6109
6110/* Record that the current function accesses previous call frames. */
6111
6112void
6113ix86_setup_frame_addresses (void)
6114{
6115 cfun->machine->accesses_prev_frame = 1;
6116}
6117
6118#if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
6119# define USE_HIDDEN_LINKONCE 1
6120#else
6121# define USE_HIDDEN_LINKONCE 0
6122#endif
6123
6124/* Label count for call and return thunks. It is used to make unique
6125 labels in call and return thunks. */
6126static int indirectlabelno;
6127
6128/* True if call thunk function is needed. */
6129static bool indirect_thunk_needed = false;
6130
6131/* Bit masks of integer registers, which contain branch target, used
6132 by call thunk functions. */
6133static HARD_REG_SET indirect_thunks_used;
6134
6135/* True if return thunk function is needed. */
6136static bool indirect_return_needed = false;
6137
6138/* True if return thunk function via CX is needed. */
6139static bool indirect_return_via_cx;
6140
6141#ifndef INDIRECT_LABEL
6142# define INDIRECT_LABEL "LIND"
6143#endif
6144
6145/* Indicate what prefix is needed for an indirect branch. */
6146enum indirect_thunk_prefix
6147{
6148 indirect_thunk_prefix_none,
6149 indirect_thunk_prefix_nt
6150};
6151
6152/* Return the prefix needed for an indirect branch INSN. */
6153
6154enum indirect_thunk_prefix
6155indirect_thunk_need_prefix (rtx_insn *insn)
6156{
6157 enum indirect_thunk_prefix need_prefix;
6158 if ((cfun->machine->indirect_branch_type
6159 == indirect_branch_thunk_extern)
6160 && ix86_notrack_prefixed_insn_p (insn))
6161 {
6162 /* NOTRACK prefix is only used with external thunk so that it
6163 can be properly updated to support CET at run-time. */
6164 need_prefix = indirect_thunk_prefix_nt;
6165 }
6166 else
6167 need_prefix = indirect_thunk_prefix_none;
6168 return need_prefix;
6169}
6170
6171/* Fills in the label name that should be used for the indirect thunk. */
6172
6173static void
6174indirect_thunk_name (char name[32], unsigned int regno,
6175 enum indirect_thunk_prefix need_prefix,
6176 bool ret_p)
6177{
6178 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
6179 gcc_unreachable ();
6180
6181 if (USE_HIDDEN_LINKONCE)
6182 {
6183 const char *prefix;
6184
6185 if (need_prefix == indirect_thunk_prefix_nt
6186 && regno != INVALID_REGNUM)
6187 {
6188 /* NOTRACK prefix is only used with external thunk via
6189 register so that NOTRACK prefix can be added to indirect
6190 branch via register to support CET at run-time. */
6191 prefix = "_nt";
6192 }
6193 else
6194 prefix = "";
6195
6196 const char *ret = ret_p ? "return" : "indirect";
6197
6198 if (regno != INVALID_REGNUM)
6199 {
6200 const char *reg_prefix;
6201 if (LEGACY_INT_REGNO_P (regno))
6202 reg_prefix = TARGET_64BIT ? "r" : "e";
6203 else
6204 reg_prefix = "";
6205 sprintf (s: name, format: "__x86_%s_thunk%s_%s%s",
6206 ret, prefix, reg_prefix, reg_names[regno]);
6207 }
6208 else
6209 sprintf (s: name, format: "__x86_%s_thunk%s", ret, prefix);
6210 }
6211 else
6212 {
6213 if (regno != INVALID_REGNUM)
6214 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
6215 else
6216 {
6217 if (ret_p)
6218 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
6219 else
6220 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
6221 }
6222 }
6223}
6224
6225/* Output a call and return thunk for indirect branch. If REGNO != -1,
6226 the function address is in REGNO and the call and return thunk looks like:
6227
6228 call L2
6229 L1:
6230 pause
6231 lfence
6232 jmp L1
6233 L2:
6234 mov %REG, (%sp)
6235 ret
6236
6237 Otherwise, the function address is on the top of stack and the
6238 call and return thunk looks like:
6239
6240 call L2
6241 L1:
6242 pause
6243 lfence
6244 jmp L1
6245 L2:
6246 lea WORD_SIZE(%sp), %sp
6247 ret
6248 */
6249
6250static void
6251output_indirect_thunk (unsigned int regno)
6252{
6253 char indirectlabel1[32];
6254 char indirectlabel2[32];
6255
6256 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
6257 indirectlabelno++);
6258 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
6259 indirectlabelno++);
6260
6261 /* Call */
6262 fputs (s: "\tcall\t", stream: asm_out_file);
6263 assemble_name_raw (asm_out_file, indirectlabel2);
6264 fputc (c: '\n', stream: asm_out_file);
6265
6266 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
6267
6268 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6269 Usage of both pause + lfence is compromise solution. */
6270 fprintf (stream: asm_out_file, format: "\tpause\n\tlfence\n");
6271
6272 /* Jump. */
6273 fputs (s: "\tjmp\t", stream: asm_out_file);
6274 assemble_name_raw (asm_out_file, indirectlabel1);
6275 fputc (c: '\n', stream: asm_out_file);
6276
6277 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
6278
6279 /* The above call insn pushed a word to stack. Adjust CFI info. */
6280 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
6281 {
6282 if (! dwarf2out_do_cfi_asm ())
6283 {
6284 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6285 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
6286 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
6287 vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi);
6288 }
6289 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6290 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
6291 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
6292 vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi);
6293 dwarf2out_emit_cfi (cfi: xcfi);
6294 }
6295
6296 if (regno != INVALID_REGNUM)
6297 {
6298 /* MOV. */
6299 rtx xops[2];
6300 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
6301 xops[1] = gen_rtx_REG (word_mode, regno);
6302 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
6303 }
6304 else
6305 {
6306 /* LEA. */
6307 rtx xops[2];
6308 xops[0] = stack_pointer_rtx;
6309 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6310 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
6311 }
6312
6313 fputs (s: "\tret\n", stream: asm_out_file);
6314 if ((ix86_harden_sls & harden_sls_return))
6315 fputs (s: "\tint3\n", stream: asm_out_file);
6316}
6317
6318/* Output a funtion with a call and return thunk for indirect branch.
6319 If REGNO != INVALID_REGNUM, the function address is in REGNO.
6320 Otherwise, the function address is on the top of stack. Thunk is
6321 used for function return if RET_P is true. */
6322
6323static void
6324output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
6325 unsigned int regno, bool ret_p)
6326{
6327 char name[32];
6328 tree decl;
6329
6330 /* Create __x86_indirect_thunk. */
6331 indirect_thunk_name (name, regno, need_prefix, ret_p);
6332 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6333 get_identifier (name),
6334 build_function_type_list (void_type_node, NULL_TREE));
6335 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6336 NULL_TREE, void_type_node);
6337 TREE_PUBLIC (decl) = 1;
6338 TREE_STATIC (decl) = 1;
6339 DECL_IGNORED_P (decl) = 1;
6340
6341#if TARGET_MACHO
6342 if (TARGET_MACHO)
6343 {
6344 switch_to_section (darwin_sections[picbase_thunk_section]);
6345 fputs ("\t.weak_definition\t", asm_out_file);
6346 assemble_name (asm_out_file, name);
6347 fputs ("\n\t.private_extern\t", asm_out_file);
6348 assemble_name (asm_out_file, name);
6349 putc ('\n', asm_out_file);
6350 ASM_OUTPUT_LABEL (asm_out_file, name);
6351 DECL_WEAK (decl) = 1;
6352 }
6353 else
6354#endif
6355 if (USE_HIDDEN_LINKONCE)
6356 {
6357 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6358
6359 targetm.asm_out.unique_section (decl, 0);
6360 switch_to_section (get_named_section (decl, NULL, 0));
6361
6362 targetm.asm_out.globalize_label (asm_out_file, name);
6363 fputs (s: "\t.hidden\t", stream: asm_out_file);
6364 assemble_name (asm_out_file, name);
6365 putc (c: '\n', stream: asm_out_file);
6366 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6367 }
6368 else
6369 {
6370 switch_to_section (text_section);
6371 ASM_OUTPUT_LABEL (asm_out_file, name);
6372 }
6373
6374 DECL_INITIAL (decl) = make_node (BLOCK);
6375 current_function_decl = decl;
6376 allocate_struct_function (decl, false);
6377 init_function_start (decl);
6378 /* We're about to hide the function body from callees of final_* by
6379 emitting it directly; tell them we're a thunk, if they care. */
6380 cfun->is_thunk = true;
6381 first_function_block_is_cold = false;
6382 /* Make sure unwind info is emitted for the thunk if needed. */
6383 final_start_function (emit_barrier (), asm_out_file, 1);
6384
6385 output_indirect_thunk (regno);
6386
6387 final_end_function ();
6388 init_insn_lengths ();
6389 free_after_compilation (cfun);
6390 set_cfun (NULL);
6391 current_function_decl = NULL;
6392}
6393
6394static int pic_labels_used;
6395
6396/* Fills in the label name that should be used for a pc thunk for
6397 the given register. */
6398
6399static void
6400get_pc_thunk_name (char name[32], unsigned int regno)
6401{
6402 gcc_assert (!TARGET_64BIT);
6403
6404 if (USE_HIDDEN_LINKONCE)
6405 sprintf (s: name, format: "__x86.get_pc_thunk.%s", reg_names[regno]);
6406 else
6407 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6408}
6409
6410
6411/* This function generates code for -fpic that loads %ebx with
6412 the return address of the caller and then returns. */
6413
6414static void
6415ix86_code_end (void)
6416{
6417 rtx xops[2];
6418 unsigned int regno;
6419
6420 if (indirect_return_needed)
6421 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6422 INVALID_REGNUM, ret_p: true);
6423 if (indirect_return_via_cx)
6424 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6425 CX_REG, ret_p: true);
6426 if (indirect_thunk_needed)
6427 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6428 INVALID_REGNUM, ret_p: false);
6429
6430 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6431 {
6432 if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno))
6433 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6434 regno, ret_p: false);
6435 }
6436
6437 for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
6438 {
6439 if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno))
6440 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6441 regno, ret_p: false);
6442 }
6443
6444 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6445 {
6446 char name[32];
6447 tree decl;
6448
6449 if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno))
6450 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6451 regno, ret_p: false);
6452
6453 if (!(pic_labels_used & (1 << regno)))
6454 continue;
6455
6456 get_pc_thunk_name (name, regno);
6457
6458 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6459 get_identifier (name),
6460 build_function_type_list (void_type_node, NULL_TREE));
6461 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6462 NULL_TREE, void_type_node);
6463 TREE_PUBLIC (decl) = 1;
6464 TREE_STATIC (decl) = 1;
6465 DECL_IGNORED_P (decl) = 1;
6466
6467#if TARGET_MACHO
6468 if (TARGET_MACHO)
6469 {
6470 switch_to_section (darwin_sections[picbase_thunk_section]);
6471 fputs ("\t.weak_definition\t", asm_out_file);
6472 assemble_name (asm_out_file, name);
6473 fputs ("\n\t.private_extern\t", asm_out_file);
6474 assemble_name (asm_out_file, name);
6475 putc ('\n', asm_out_file);
6476 ASM_OUTPUT_LABEL (asm_out_file, name);
6477 DECL_WEAK (decl) = 1;
6478 }
6479 else
6480#endif
6481 if (USE_HIDDEN_LINKONCE)
6482 {
6483 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6484
6485 targetm.asm_out.unique_section (decl, 0);
6486 switch_to_section (get_named_section (decl, NULL, 0));
6487
6488 targetm.asm_out.globalize_label (asm_out_file, name);
6489 fputs (s: "\t.hidden\t", stream: asm_out_file);
6490 assemble_name (asm_out_file, name);
6491 putc (c: '\n', stream: asm_out_file);
6492 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6493 }
6494 else
6495 {
6496 switch_to_section (text_section);
6497 ASM_OUTPUT_LABEL (asm_out_file, name);
6498 }
6499
6500 DECL_INITIAL (decl) = make_node (BLOCK);
6501 current_function_decl = decl;
6502 allocate_struct_function (decl, false);
6503 init_function_start (decl);
6504 /* We're about to hide the function body from callees of final_* by
6505 emitting it directly; tell them we're a thunk, if they care. */
6506 cfun->is_thunk = true;
6507 first_function_block_is_cold = false;
6508 /* Make sure unwind info is emitted for the thunk if needed. */
6509 final_start_function (emit_barrier (), asm_out_file, 1);
6510
6511 /* Pad stack IP move with 4 instructions (two NOPs count
6512 as one instruction). */
6513 if (TARGET_PAD_SHORT_FUNCTION)
6514 {
6515 int i = 8;
6516
6517 while (i--)
6518 fputs (s: "\tnop\n", stream: asm_out_file);
6519 }
6520
6521 xops[0] = gen_rtx_REG (Pmode, regno);
6522 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6523 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6524 fputs (s: "\tret\n", stream: asm_out_file);
6525 final_end_function ();
6526 init_insn_lengths ();
6527 free_after_compilation (cfun);
6528 set_cfun (NULL);
6529 current_function_decl = NULL;
6530 }
6531
6532 if (flag_split_stack)
6533 file_end_indicate_split_stack ();
6534}
6535
6536/* Emit code for the SET_GOT patterns. */
6537
6538const char *
6539output_set_got (rtx dest, rtx label)
6540{
6541 rtx xops[3];
6542
6543 xops[0] = dest;
6544
6545 if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic)
6546 {
6547 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6548 xops[2] = gen_rtx_MEM (Pmode,
6549 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6550 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6551
6552 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6553 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6554 an unadorned address. */
6555 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6556 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6557 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6558 return "";
6559 }
6560
6561 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6562
6563 if (flag_pic)
6564 {
6565 char name[32];
6566 get_pc_thunk_name (name, REGNO (dest));
6567 pic_labels_used |= 1 << REGNO (dest);
6568
6569 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6570 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6571 output_asm_insn ("%!call\t%X2", xops);
6572
6573#if TARGET_MACHO
6574 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6575 This is what will be referenced by the Mach-O PIC subsystem. */
6576 if (machopic_should_output_picbase_label () || !label)
6577 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6578
6579 /* When we are restoring the pic base at the site of a nonlocal label,
6580 and we decided to emit the pic base above, we will still output a
6581 local label used for calculating the correction offset (even though
6582 the offset will be 0 in that case). */
6583 if (label)
6584 targetm.asm_out.internal_label (asm_out_file, "L",
6585 CODE_LABEL_NUMBER (label));
6586#endif
6587 }
6588 else
6589 {
6590 if (TARGET_MACHO)
6591 /* We don't need a pic base, we're not producing pic. */
6592 gcc_unreachable ();
6593
6594 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6595 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6596 targetm.asm_out.internal_label (asm_out_file, "L",
6597 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6598 }
6599
6600 if (!TARGET_MACHO)
6601 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6602
6603 return "";
6604}
6605
6606/* Generate an "push" pattern for input ARG. */
6607
6608rtx
6609gen_push (rtx arg, bool ppx_p)
6610{
6611 struct machine_function *m = cfun->machine;
6612
6613 if (m->fs.cfa_reg == stack_pointer_rtx)
6614 m->fs.cfa_offset += UNITS_PER_WORD;
6615 m->fs.sp_offset += UNITS_PER_WORD;
6616
6617 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6618 arg = gen_rtx_REG (word_mode, REGNO (arg));
6619
6620 rtx stack = gen_rtx_MEM (word_mode,
6621 gen_rtx_PRE_DEC (Pmode,
6622 stack_pointer_rtx));
6623 return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
6624}
6625
6626rtx
6627gen_pushfl (void)
6628{
6629 struct machine_function *m = cfun->machine;
6630 rtx flags, mem;
6631
6632 if (m->fs.cfa_reg == stack_pointer_rtx)
6633 m->fs.cfa_offset += UNITS_PER_WORD;
6634 m->fs.sp_offset += UNITS_PER_WORD;
6635
6636 flags = gen_rtx_REG (CCmode, FLAGS_REG);
6637
6638 mem = gen_rtx_MEM (word_mode,
6639 gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
6640
6641 return gen_pushfl2 (arg0: word_mode, x0: mem, x1: flags);
6642}
6643
6644/* Generate an "pop" pattern for input ARG. */
6645
6646rtx
6647gen_pop (rtx arg, bool ppx_p)
6648{
6649 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6650 arg = gen_rtx_REG (word_mode, REGNO (arg));
6651
6652 rtx stack = gen_rtx_MEM (word_mode,
6653 gen_rtx_POST_INC (Pmode,
6654 stack_pointer_rtx));
6655
6656 return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
6657}
6658
6659rtx
6660gen_popfl (void)
6661{
6662 rtx flags, mem;
6663
6664 flags = gen_rtx_REG (CCmode, FLAGS_REG);
6665
6666 mem = gen_rtx_MEM (word_mode,
6667 gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
6668
6669 return gen_popfl1 (arg0: word_mode, x0: flags, x1: mem);
6670}
6671
6672/* Generate a "push2" pattern for input ARG. */
6673rtx
6674gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
6675{
6676 struct machine_function *m = cfun->machine;
6677 const int offset = UNITS_PER_WORD * 2;
6678
6679 if (m->fs.cfa_reg == stack_pointer_rtx)
6680 m->fs.cfa_offset += offset;
6681 m->fs.sp_offset += offset;
6682
6683 if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
6684 reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
6685
6686 if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
6687 reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
6688
6689 return ppx_p ? gen_push2p_di (mem, reg1, reg2)
6690 : gen_push2_di (mem, reg1, reg2);
6691}
6692
6693/* Return >= 0 if there is an unused call-clobbered register available
6694 for the entire function. */
6695
6696static unsigned int
6697ix86_select_alt_pic_regnum (void)
6698{
6699 if (ix86_use_pseudo_pic_reg ())
6700 return INVALID_REGNUM;
6701
6702 if (crtl->is_leaf
6703 && !crtl->profile
6704 && !ix86_current_function_calls_tls_descriptor)
6705 {
6706 int i, drap;
6707 /* Can't use the same register for both PIC and DRAP. */
6708 if (crtl->drap_reg)
6709 drap = REGNO (crtl->drap_reg);
6710 else
6711 drap = -1;
6712 for (i = 2; i >= 0; --i)
6713 if (i != drap && !df_regs_ever_live_p (i))
6714 return i;
6715 }
6716
6717 return INVALID_REGNUM;
6718}
6719
6720/* Return true if REGNO is used by the epilogue. */
6721
6722bool
6723ix86_epilogue_uses (int regno)
6724{
6725 /* If there are no caller-saved registers, we preserve all registers,
6726 except for MMX and x87 registers which aren't supported when saving
6727 and restoring registers. Don't explicitly save SP register since
6728 it is always preserved. */
6729 return (epilogue_completed
6730 && (cfun->machine->call_saved_registers
6731 == TYPE_NO_CALLER_SAVED_REGISTERS)
6732 && !fixed_regs[regno]
6733 && !STACK_REGNO_P (regno)
6734 && !MMX_REGNO_P (regno));
6735}
6736
6737/* Return nonzero if register REGNO can be used as a scratch register
6738 in peephole2. */
6739
6740static bool
6741ix86_hard_regno_scratch_ok (unsigned int regno)
6742{
6743 /* If there are no caller-saved registers, we can't use any register
6744 as a scratch register after epilogue and use REGNO as scratch
6745 register only if it has been used before to avoid saving and
6746 restoring it. */
6747 return ((cfun->machine->call_saved_registers
6748 != TYPE_NO_CALLER_SAVED_REGISTERS)
6749 || (!epilogue_completed
6750 && df_regs_ever_live_p (regno)));
6751}
6752
6753/* Return TRUE if we need to save REGNO. */
6754
6755bool
6756ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6757{
6758 rtx reg;
6759
6760 switch (cfun->machine->call_saved_registers)
6761 {
6762 case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
6763 break;
6764
6765 case TYPE_NO_CALLER_SAVED_REGISTERS:
6766 /* If there are no caller-saved registers, we preserve all
6767 registers, except for MMX and x87 registers which aren't
6768 supported when saving and restoring registers. Don't
6769 explicitly save SP register since it is always preserved.
6770
6771 Don't preserve registers used for function return value. */
6772 reg = crtl->return_rtx;
6773 if (reg)
6774 {
6775 unsigned int i = REGNO (reg);
6776 unsigned int nregs = REG_NREGS (reg);
6777 while (nregs-- > 0)
6778 if ((i + nregs) == regno)
6779 return false;
6780 }
6781
6782 return (df_regs_ever_live_p (regno)
6783 && !fixed_regs[regno]
6784 && !STACK_REGNO_P (regno)
6785 && !MMX_REGNO_P (regno)
6786 && (regno != HARD_FRAME_POINTER_REGNUM
6787 || !frame_pointer_needed));
6788
6789 case TYPE_NO_CALLEE_SAVED_REGISTERS:
6790 case TYPE_PRESERVE_NONE:
6791 if (regno != HARD_FRAME_POINTER_REGNUM)
6792 return false;
6793 break;
6794 }
6795
6796 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6797 && pic_offset_table_rtx)
6798 {
6799 if (ix86_use_pseudo_pic_reg ())
6800 {
6801 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6802 _mcount in prologue. */
6803 if (!TARGET_64BIT && flag_pic && crtl->profile)
6804 return true;
6805 }
6806 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6807 || crtl->profile
6808 || crtl->calls_eh_return
6809 || crtl->uses_const_pool
6810 || cfun->has_nonlocal_label)
6811 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6812 }
6813
6814 if (crtl->calls_eh_return && maybe_eh_return)
6815 {
6816 unsigned i;
6817 for (i = 0; ; i++)
6818 {
6819 unsigned test = EH_RETURN_DATA_REGNO (i);
6820 if (test == INVALID_REGNUM)
6821 break;
6822 if (test == regno)
6823 return true;
6824 }
6825 }
6826
6827 if (ignore_outlined && cfun->machine->call_ms2sysv)
6828 {
6829 unsigned count = cfun->machine->call_ms2sysv_extra_regs
6830 + xlogue_layout::MIN_REGS;
6831 if (xlogue_layout::is_stub_managed_reg (regno, count))
6832 return false;
6833 }
6834
6835 if (crtl->drap_reg
6836 && regno == REGNO (crtl->drap_reg)
6837 && !cfun->machine->no_drap_save_restore)
6838 return true;
6839
6840 return (df_regs_ever_live_p (regno)
6841 && !call_used_or_fixed_reg_p (regno)
6842 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6843}
6844
6845/* Return number of saved general prupose registers. */
6846
6847static int
6848ix86_nsaved_regs (void)
6849{
6850 int nregs = 0;
6851 int regno;
6852
6853 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6854 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
6855 nregs ++;
6856 return nregs;
6857}
6858
6859/* Return number of saved SSE registers. */
6860
6861static int
6862ix86_nsaved_sseregs (void)
6863{
6864 int nregs = 0;
6865 int regno;
6866
6867 if (!TARGET_64BIT_MS_ABI
6868 && (cfun->machine->call_saved_registers
6869 != TYPE_NO_CALLER_SAVED_REGISTERS))
6870 return 0;
6871 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6872 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
6873 nregs ++;
6874 return nregs;
6875}
6876
6877/* Given FROM and TO register numbers, say whether this elimination is
6878 allowed. If stack alignment is needed, we can only replace argument
6879 pointer with hard frame pointer, or replace frame pointer with stack
6880 pointer. Otherwise, frame pointer elimination is automatically
6881 handled and all other eliminations are valid. */
6882
6883static bool
6884ix86_can_eliminate (const int from, const int to)
6885{
6886 if (stack_realign_fp)
6887 return ((from == ARG_POINTER_REGNUM
6888 && to == HARD_FRAME_POINTER_REGNUM)
6889 || (from == FRAME_POINTER_REGNUM
6890 && to == STACK_POINTER_REGNUM));
6891 else
6892 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6893}
6894
6895/* Return the offset between two registers, one to be eliminated, and the other
6896 its replacement, at the start of a routine. */
6897
6898HOST_WIDE_INT
6899ix86_initial_elimination_offset (int from, int to)
6900{
6901 struct ix86_frame &frame = cfun->machine->frame;
6902
6903 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6904 return frame.hard_frame_pointer_offset;
6905 else if (from == FRAME_POINTER_REGNUM
6906 && to == HARD_FRAME_POINTER_REGNUM)
6907 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6908 else
6909 {
6910 gcc_assert (to == STACK_POINTER_REGNUM);
6911
6912 if (from == ARG_POINTER_REGNUM)
6913 return frame.stack_pointer_offset;
6914
6915 gcc_assert (from == FRAME_POINTER_REGNUM);
6916 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6917 }
6918}
6919
6920/* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6921void
6922warn_once_call_ms2sysv_xlogues (const char *feature)
6923{
6924 static bool warned_once = false;
6925 if (!warned_once)
6926 {
6927 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6928 feature);
6929 warned_once = true;
6930 }
6931}
6932
6933/* Return the probing interval for -fstack-clash-protection. */
6934
6935static HOST_WIDE_INT
6936get_probe_interval (void)
6937{
6938 if (flag_stack_clash_protection)
6939 return (HOST_WIDE_INT_1U
6940 << param_stack_clash_protection_probe_interval);
6941 else
6942 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6943}
6944
6945/* When using -fsplit-stack, the allocation routines set a field in
6946 the TCB to the bottom of the stack plus this much space, measured
6947 in bytes. */
6948
6949#define SPLIT_STACK_AVAILABLE 256
6950
6951/* Return true if push2/pop2 can be generated. */
6952
6953static bool
6954ix86_can_use_push2pop2 (void)
6955{
6956 /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */
6957 unsigned int incoming_stack_boundary
6958 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
6959 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
6960 return incoming_stack_boundary % 128 == 0;
6961}
6962
6963/* Helper function to determine whether push2/pop2 can be used in prologue or
6964 epilogue for register save/restore. */
6965static bool
6966ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
6967{
6968 if (!ix86_can_use_push2pop2 ())
6969 return false;
6970 int aligned = cfun->machine->fs.sp_offset % 16 == 0;
6971 return TARGET_APX_PUSH2POP2
6972 && !cfun->machine->frame.save_regs_using_mov
6973 && cfun->machine->func_type == TYPE_NORMAL
6974 && (nregs + aligned) >= 3;
6975}
6976
6977/* Check if push/pop should be used to save/restore registers. */
6978static bool
6979save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
6980{
6981 return ((!to_allocate && cfun->machine->frame.nregs <= 1)
6982 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6983 /* If static stack checking is enabled and done with probes,
6984 the registers need to be saved before allocating the frame. */
6985 || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6986 /* If stack clash probing needs a loop, then it needs a
6987 scratch register. But the returned register is only guaranteed
6988 to be safe to use after register saves are complete. So if
6989 stack clash protections are enabled and the allocated frame is
6990 larger than the probe interval, then use pushes to save
6991 callee saved registers. */
6992 || (flag_stack_clash_protection
6993 && !ix86_target_stack_probe ()
6994 && to_allocate > get_probe_interval ()));
6995}
6996
6997/* Fill structure ix86_frame about frame of currently computed function. */
6998
6999static void
7000ix86_compute_frame_layout (void)
7001{
7002 struct ix86_frame *frame = &cfun->machine->frame;
7003 struct machine_function *m = cfun->machine;
7004 unsigned HOST_WIDE_INT stack_alignment_needed;
7005 HOST_WIDE_INT offset;
7006 unsigned HOST_WIDE_INT preferred_alignment;
7007 HOST_WIDE_INT size = ix86_get_frame_size ();
7008 HOST_WIDE_INT to_allocate;
7009
7010 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
7011 * ms_abi functions that call a sysv function. We now need to prune away
7012 * cases where it should be disabled. */
7013 if (TARGET_64BIT && m->call_ms2sysv)
7014 {
7015 gcc_assert (TARGET_64BIT_MS_ABI);
7016 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
7017 gcc_assert (!TARGET_SEH);
7018 gcc_assert (TARGET_SSE);
7019 gcc_assert (!ix86_using_red_zone ());
7020
7021 if (crtl->calls_eh_return)
7022 {
7023 gcc_assert (!reload_completed);
7024 m->call_ms2sysv = false;
7025 warn_once_call_ms2sysv_xlogues (feature: "__builtin_eh_return");
7026 }
7027
7028 else if (ix86_static_chain_on_stack)
7029 {
7030 gcc_assert (!reload_completed);
7031 m->call_ms2sysv = false;
7032 warn_once_call_ms2sysv_xlogues (feature: "static call chains");
7033 }
7034
7035 /* Finally, compute which registers the stub will manage. */
7036 else
7037 {
7038 unsigned count = xlogue_layout::count_stub_managed_regs ();
7039 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
7040 m->call_ms2sysv_pad_in = 0;
7041 }
7042 }
7043
7044 frame->nregs = ix86_nsaved_regs ();
7045 frame->nsseregs = ix86_nsaved_sseregs ();
7046
7047 /* 64-bit MS ABI seem to require stack alignment to be always 16,
7048 except for function prologues, leaf functions and when the defult
7049 incoming stack boundary is overriden at command line or via
7050 force_align_arg_pointer attribute.
7051
7052 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
7053 at call sites, including profile function calls.
7054
7055 For APX push2/pop2, the stack also requires 128b alignment. */
7056 if ((ix86_pro_and_epilogue_can_use_push2pop2 (nregs: frame->nregs)
7057 && crtl->preferred_stack_boundary < 128)
7058 || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
7059 && crtl->preferred_stack_boundary < 128)
7060 && (!crtl->is_leaf || cfun->calls_alloca != 0
7061 || ix86_current_function_calls_tls_descriptor
7062 || (TARGET_MACHO && crtl->profile)
7063 || ix86_incoming_stack_boundary < 128)))
7064 {
7065 crtl->preferred_stack_boundary = 128;
7066 if (crtl->stack_alignment_needed < 128)
7067 crtl->stack_alignment_needed = 128;
7068 }
7069
7070 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7071 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7072
7073 gcc_assert (!size || stack_alignment_needed);
7074 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7075 gcc_assert (preferred_alignment <= stack_alignment_needed);
7076
7077 /* The only ABI saving SSE regs should be 64-bit ms_abi or with
7078 no_caller_saved_registers attribue. */
7079 gcc_assert (TARGET_64BIT
7080 || (cfun->machine->call_saved_registers
7081 == TYPE_NO_CALLER_SAVED_REGISTERS)
7082 || !frame->nsseregs);
7083 if (TARGET_64BIT && m->call_ms2sysv)
7084 {
7085 gcc_assert (stack_alignment_needed >= 16);
7086 gcc_assert ((cfun->machine->call_saved_registers
7087 == TYPE_NO_CALLER_SAVED_REGISTERS)
7088 || !frame->nsseregs);
7089 }
7090
7091 /* For SEH we have to limit the amount of code movement into the prologue.
7092 At present we do this via a BLOCKAGE, at which point there's very little
7093 scheduling that can be done, which means that there's very little point
7094 in doing anything except PUSHs. */
7095 if (TARGET_SEH)
7096 m->use_fast_prologue_epilogue = false;
7097 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
7098 {
7099 int count = frame->nregs;
7100 struct cgraph_node *node = cgraph_node::get (decl: current_function_decl);
7101
7102 /* The fast prologue uses move instead of push to save registers. This
7103 is significantly longer, but also executes faster as modern hardware
7104 can execute the moves in parallel, but can't do that for push/pop.
7105
7106 Be careful about choosing what prologue to emit: When function takes
7107 many instructions to execute we may use slow version as well as in
7108 case function is known to be outside hot spot (this is known with
7109 feedback only). Weight the size of function by number of registers
7110 to save as it is cheap to use one or two push instructions but very
7111 slow to use many of them.
7112
7113 Calling this hook multiple times with the same frame requirements
7114 must produce the same layout, since the RA might otherwise be
7115 unable to reach a fixed point or might fail its final sanity checks.
7116 This means that once we've assumed that a function does or doesn't
7117 have a particular size, we have to stick to that assumption
7118 regardless of how the function has changed since. */
7119 if (count)
7120 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7121 if (node->frequency < NODE_FREQUENCY_NORMAL
7122 || (flag_branch_probabilities
7123 && node->frequency < NODE_FREQUENCY_HOT))
7124 m->use_fast_prologue_epilogue = false;
7125 else
7126 {
7127 if (count != frame->expensive_count)
7128 {
7129 frame->expensive_count = count;
7130 frame->expensive_p = expensive_function_p (count);
7131 }
7132 m->use_fast_prologue_epilogue = !frame->expensive_p;
7133 }
7134 }
7135
7136 frame->save_regs_using_mov
7137 = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
7138
7139 /* Skip return address and error code in exception handler. */
7140 offset = INCOMING_FRAME_SP_OFFSET;
7141
7142 /* Skip pushed static chain. */
7143 if (ix86_static_chain_on_stack)
7144 offset += UNITS_PER_WORD;
7145
7146 /* Skip saved base pointer. */
7147 if (frame_pointer_needed)
7148 offset += UNITS_PER_WORD;
7149 frame->hfp_save_offset = offset;
7150
7151 /* The traditional frame pointer location is at the top of the frame. */
7152 frame->hard_frame_pointer_offset = offset;
7153
7154 /* Register save area */
7155 offset += frame->nregs * UNITS_PER_WORD;
7156 frame->reg_save_offset = offset;
7157
7158 /* Calculate the size of the va-arg area (not including padding, if any). */
7159 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7160
7161 /* Also adjust stack_realign_offset for the largest alignment of
7162 stack slot actually used. */
7163 if (stack_realign_fp
7164 || (cfun->machine->max_used_stack_alignment != 0
7165 && (offset % cfun->machine->max_used_stack_alignment) != 0))
7166 {
7167 /* We may need a 16-byte aligned stack for the remainder of the
7168 register save area, but the stack frame for the local function
7169 may require a greater alignment if using AVX/2/512. In order
7170 to avoid wasting space, we first calculate the space needed for
7171 the rest of the register saves, add that to the stack pointer,
7172 and then realign the stack to the boundary of the start of the
7173 frame for the local function. */
7174 HOST_WIDE_INT space_needed = 0;
7175 HOST_WIDE_INT sse_reg_space_needed = 0;
7176
7177 if (TARGET_64BIT)
7178 {
7179 if (m->call_ms2sysv)
7180 {
7181 m->call_ms2sysv_pad_in = 0;
7182 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
7183 }
7184
7185 else if (frame->nsseregs)
7186 /* The only ABI that has saved SSE registers (Win64) also has a
7187 16-byte aligned default stack. However, many programs violate
7188 the ABI, and Wine64 forces stack realignment to compensate. */
7189 space_needed = frame->nsseregs * 16;
7190
7191 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
7192
7193 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
7194 rounding to be pedantic. */
7195 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
7196 }
7197 else
7198 space_needed = frame->va_arg_size;
7199
7200 /* Record the allocation size required prior to the realignment AND. */
7201 frame->stack_realign_allocate = space_needed;
7202
7203 /* The re-aligned stack starts at frame->stack_realign_offset. Values
7204 before this point are not directly comparable with values below
7205 this point. Use sp_valid_at to determine if the stack pointer is
7206 valid for a given offset, fp_valid_at for the frame pointer, or
7207 choose_baseaddr to have a base register chosen for you.
7208
7209 Note that the result of (frame->stack_realign_offset
7210 & (stack_alignment_needed - 1)) may not equal zero. */
7211 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
7212 frame->stack_realign_offset = offset - space_needed;
7213 frame->sse_reg_save_offset = frame->stack_realign_offset
7214 + sse_reg_space_needed;
7215 }
7216 else
7217 {
7218 frame->stack_realign_offset = offset;
7219
7220 if (TARGET_64BIT && m->call_ms2sysv)
7221 {
7222 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
7223 offset += xlogue_layout::get_instance ().get_stack_space_used ();
7224 }
7225
7226 /* Align and set SSE register save area. */
7227 else if (frame->nsseregs)
7228 {
7229 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
7230 required and the DRAP re-alignment boundary is at least 16 bytes,
7231 then we want the SSE register save area properly aligned. */
7232 if (ix86_incoming_stack_boundary >= 128
7233 || (stack_realign_drap && stack_alignment_needed >= 16))
7234 offset = ROUND_UP (offset, 16);
7235 offset += frame->nsseregs * 16;
7236 }
7237 frame->sse_reg_save_offset = offset;
7238 offset += frame->va_arg_size;
7239 }
7240
7241 /* Align start of frame for local function. When a function call
7242 is removed, it may become a leaf function. But if argument may
7243 be passed on stack, we need to align the stack when there is no
7244 tail call. */
7245 if (m->call_ms2sysv
7246 || frame->va_arg_size != 0
7247 || size != 0
7248 || !crtl->is_leaf
7249 || (!crtl->tail_call_emit
7250 && cfun->machine->outgoing_args_on_stack)
7251 || cfun->calls_alloca
7252 || ix86_current_function_calls_tls_descriptor)
7253 offset = ROUND_UP (offset, stack_alignment_needed);
7254
7255 /* Frame pointer points here. */
7256 frame->frame_pointer_offset = offset;
7257
7258 offset += size;
7259
7260 /* Add outgoing arguments area. Can be skipped if we eliminated
7261 all the function calls as dead code.
7262 Skipping is however impossible when function calls alloca. Alloca
7263 expander assumes that last crtl->outgoing_args_size
7264 of stack frame are unused. */
7265 if (ACCUMULATE_OUTGOING_ARGS
7266 && (!crtl->is_leaf || cfun->calls_alloca
7267 || ix86_current_function_calls_tls_descriptor))
7268 {
7269 offset += crtl->outgoing_args_size;
7270 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7271 }
7272 else
7273 frame->outgoing_arguments_size = 0;
7274
7275 /* Align stack boundary. Only needed if we're calling another function
7276 or using alloca. */
7277 if (!crtl->is_leaf || cfun->calls_alloca
7278 || ix86_current_function_calls_tls_descriptor)
7279 offset = ROUND_UP (offset, preferred_alignment);
7280
7281 /* We've reached end of stack frame. */
7282 frame->stack_pointer_offset = offset;
7283
7284 /* Size prologue needs to allocate. */
7285 to_allocate = offset - frame->sse_reg_save_offset;
7286
7287 if (save_regs_using_push_pop (to_allocate))
7288 frame->save_regs_using_mov = false;
7289
7290 if (ix86_using_red_zone ()
7291 && crtl->sp_is_unchanging
7292 && crtl->is_leaf
7293 && !cfun->machine->asm_redzone_clobber_seen
7294 && !ix86_pc_thunk_call_expanded
7295 && !ix86_current_function_calls_tls_descriptor)
7296 {
7297 frame->red_zone_size = to_allocate;
7298 if (frame->save_regs_using_mov)
7299 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7300 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7301 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7302 }
7303 else
7304 frame->red_zone_size = 0;
7305 frame->stack_pointer_offset -= frame->red_zone_size;
7306
7307 /* The SEH frame pointer location is near the bottom of the frame.
7308 This is enforced by the fact that the difference between the
7309 stack pointer and the frame pointer is limited to 240 bytes in
7310 the unwind data structure. */
7311 if (TARGET_SEH)
7312 {
7313 /* Force the frame pointer to point at or below the lowest register save
7314 area, see the SEH code in config/i386/winnt.cc for the rationale. */
7315 frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
7316
7317 /* If we can leave the frame pointer where it is, do so; however return
7318 the establisher frame for __builtin_frame_address (0) or else if the
7319 frame overflows the SEH maximum frame size.
7320
7321 Note that the value returned by __builtin_frame_address (0) is quite
7322 constrained, because setjmp is piggybacked on the SEH machinery with
7323 recent versions of MinGW:
7324
7325 # elif defined(__SEH__)
7326 # if defined(__aarch64__) || defined(_ARM64_)
7327 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
7328 # elif (__MINGW_GCC_VERSION < 40702)
7329 # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
7330 # else
7331 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
7332 # endif
7333
7334 and the second argument passed to _setjmp, if not null, is forwarded
7335 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
7336 built an ExceptionRecord on the fly describing the setjmp buffer). */
7337 const HOST_WIDE_INT diff
7338 = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
7339 if (diff <= 255 && !crtl->accesses_prior_frames)
7340 {
7341 /* The resulting diff will be a multiple of 16 lower than 255,
7342 i.e. at most 240 as required by the unwind data structure. */
7343 frame->hard_frame_pointer_offset += (diff & 15);
7344 }
7345 else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
7346 {
7347 /* Ideally we'd determine what portion of the local stack frame
7348 (within the constraint of the lowest 240) is most heavily used.
7349 But without that complication, simply bias the frame pointer
7350 by 128 bytes so as to maximize the amount of the local stack
7351 frame that is addressable with 8-bit offsets. */
7352 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
7353 }
7354 else
7355 frame->hard_frame_pointer_offset = frame->hfp_save_offset;
7356 }
7357}
7358
7359/* This is semi-inlined memory_address_length, but simplified
7360 since we know that we're always dealing with reg+offset, and
7361 to avoid having to create and discard all that rtl. */
7362
7363static inline int
7364choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
7365{
7366 int len = 4;
7367
7368 if (offset == 0)
7369 {
7370 /* EBP and R13 cannot be encoded without an offset. */
7371 len = (regno == BP_REG || regno == R13_REG);
7372 }
7373 else if (IN_RANGE (offset, -128, 127))
7374 len = 1;
7375
7376 /* ESP and R12 must be encoded with a SIB byte. */
7377 if (regno == SP_REG || regno == R12_REG)
7378 len++;
7379
7380 return len;
7381}
7382
7383/* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7384 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7385
7386static bool
7387sp_valid_at (HOST_WIDE_INT cfa_offset)
7388{
7389 const struct machine_frame_state &fs = cfun->machine->fs;
7390 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
7391 {
7392 /* Validate that the cfa_offset isn't in a "no-man's land". */
7393 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
7394 return false;
7395 }
7396 return fs.sp_valid;
7397}
7398
7399/* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7400 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7401
7402static inline bool
7403fp_valid_at (HOST_WIDE_INT cfa_offset)
7404{
7405 const struct machine_frame_state &fs = cfun->machine->fs;
7406 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
7407 {
7408 /* Validate that the cfa_offset isn't in a "no-man's land". */
7409 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
7410 return false;
7411 }
7412 return fs.fp_valid;
7413}
7414
7415/* Choose a base register based upon alignment requested, speed and/or
7416 size. */
7417
7418static void
7419choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
7420 HOST_WIDE_INT &base_offset,
7421 unsigned int align_reqested, unsigned int *align)
7422{
7423 const struct machine_function *m = cfun->machine;
7424 unsigned int hfp_align;
7425 unsigned int drap_align;
7426 unsigned int sp_align;
7427 bool hfp_ok = fp_valid_at (cfa_offset);
7428 bool drap_ok = m->fs.drap_valid;
7429 bool sp_ok = sp_valid_at (cfa_offset);
7430
7431 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
7432
7433 /* Filter out any registers that don't meet the requested alignment
7434 criteria. */
7435 if (align_reqested)
7436 {
7437 if (m->fs.realigned)
7438 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
7439 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7440 notes (which we would need to use a realigned stack pointer),
7441 so disable on SEH targets. */
7442 else if (m->fs.sp_realigned)
7443 sp_align = crtl->stack_alignment_needed;
7444
7445 hfp_ok = hfp_ok && hfp_align >= align_reqested;
7446 drap_ok = drap_ok && drap_align >= align_reqested;
7447 sp_ok = sp_ok && sp_align >= align_reqested;
7448 }
7449
7450 if (m->use_fast_prologue_epilogue)
7451 {
7452 /* Choose the base register most likely to allow the most scheduling
7453 opportunities. Generally FP is valid throughout the function,
7454 while DRAP must be reloaded within the epilogue. But choose either
7455 over the SP due to increased encoding size. */
7456
7457 if (hfp_ok)
7458 {
7459 base_reg = hard_frame_pointer_rtx;
7460 base_offset = m->fs.fp_offset - cfa_offset;
7461 }
7462 else if (drap_ok)
7463 {
7464 base_reg = crtl->drap_reg;
7465 base_offset = 0 - cfa_offset;
7466 }
7467 else if (sp_ok)
7468 {
7469 base_reg = stack_pointer_rtx;
7470 base_offset = m->fs.sp_offset - cfa_offset;
7471 }
7472 }
7473 else
7474 {
7475 HOST_WIDE_INT toffset;
7476 int len = 16, tlen;
7477
7478 /* Choose the base register with the smallest address encoding.
7479 With a tie, choose FP > DRAP > SP. */
7480 if (sp_ok)
7481 {
7482 base_reg = stack_pointer_rtx;
7483 base_offset = m->fs.sp_offset - cfa_offset;
7484 len = choose_baseaddr_len (STACK_POINTER_REGNUM, offset: base_offset);
7485 }
7486 if (drap_ok)
7487 {
7488 toffset = 0 - cfa_offset;
7489 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), offset: toffset);
7490 if (tlen <= len)
7491 {
7492 base_reg = crtl->drap_reg;
7493 base_offset = toffset;
7494 len = tlen;
7495 }
7496 }
7497 if (hfp_ok)
7498 {
7499 toffset = m->fs.fp_offset - cfa_offset;
7500 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, offset: toffset);
7501 if (tlen <= len)
7502 {
7503 base_reg = hard_frame_pointer_rtx;
7504 base_offset = toffset;
7505 }
7506 }
7507 }
7508
7509 /* Set the align return value. */
7510 if (align)
7511 {
7512 if (base_reg == stack_pointer_rtx)
7513 *align = sp_align;
7514 else if (base_reg == crtl->drap_reg)
7515 *align = drap_align;
7516 else if (base_reg == hard_frame_pointer_rtx)
7517 *align = hfp_align;
7518 }
7519}
7520
7521/* Return an RTX that points to CFA_OFFSET within the stack frame and
7522 the alignment of address. If ALIGN is non-null, it should point to
7523 an alignment value (in bits) that is preferred or zero and will
7524 recieve the alignment of the base register that was selected,
7525 irrespective of rather or not CFA_OFFSET is a multiple of that
7526 alignment value. If it is possible for the base register offset to be
7527 non-immediate then SCRATCH_REGNO should specify a scratch register to
7528 use.
7529
7530 The valid base registers are taken from CFUN->MACHINE->FS. */
7531
7532static rtx
7533choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7534 unsigned int scratch_regno = INVALID_REGNUM)
7535{
7536 rtx base_reg = NULL;
7537 HOST_WIDE_INT base_offset = 0;
7538
7539 /* If a specific alignment is requested, try to get a base register
7540 with that alignment first. */
7541 if (align && *align)
7542 choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: *align, align);
7543
7544 if (!base_reg)
7545 choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: 0, align);
7546
7547 gcc_assert (base_reg != NULL);
7548
7549 rtx base_offset_rtx = GEN_INT (base_offset);
7550
7551 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7552 {
7553 gcc_assert (scratch_regno != INVALID_REGNUM);
7554
7555 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7556 emit_move_insn (scratch_reg, base_offset_rtx);
7557
7558 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7559 }
7560
7561 return plus_constant (Pmode, base_reg, base_offset);
7562}
7563
7564/* Emit code to save registers in the prologue. */
7565
7566static void
7567ix86_emit_save_regs (void)
7568{
7569 int regno;
7570 rtx_insn *insn;
7571 bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
7572
7573 if (!TARGET_APX_PUSH2POP2
7574 || !ix86_can_use_push2pop2 ()
7575 || cfun->machine->func_type != TYPE_NORMAL)
7576 {
7577 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7578 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7579 {
7580 insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno),
7581 ppx_p: use_ppx));
7582 RTX_FRAME_RELATED_P (insn) = 1;
7583 }
7584 }
7585 else
7586 {
7587 int regno_list[2];
7588 regno_list[0] = regno_list[1] = -1;
7589 int loaded_regnum = 0;
7590 bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
7591
7592 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7593 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7594 {
7595 if (aligned)
7596 {
7597 regno_list[loaded_regnum++] = regno;
7598 if (loaded_regnum == 2)
7599 {
7600 gcc_assert (regno_list[0] != -1
7601 && regno_list[1] != -1
7602 && regno_list[0] != regno_list[1]);
7603 const int offset = UNITS_PER_WORD * 2;
7604 rtx mem = gen_rtx_MEM (TImode,
7605 gen_rtx_PRE_DEC (Pmode,
7606 stack_pointer_rtx));
7607 insn = emit_insn (gen_push2 (mem,
7608 reg1: gen_rtx_REG (word_mode,
7609 regno_list[0]),
7610 reg2: gen_rtx_REG (word_mode,
7611 regno_list[1]),
7612 ppx_p: use_ppx));
7613 RTX_FRAME_RELATED_P (insn) = 1;
7614 rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
7615
7616 for (int i = 0; i < 2; i++)
7617 {
7618 rtx dwarf_reg = gen_rtx_REG (word_mode,
7619 regno_list[i]);
7620 rtx sp_offset = plus_constant (Pmode,
7621 stack_pointer_rtx,
7622 + UNITS_PER_WORD
7623 * (1 - i));
7624 rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
7625 sp_offset),
7626 dwarf_reg);
7627 RTX_FRAME_RELATED_P (tmp) = 1;
7628 XVECEXP (dwarf, 0, i + 1) = tmp;
7629 }
7630 rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
7631 plus_constant (Pmode,
7632 stack_pointer_rtx,
7633 -offset));
7634 RTX_FRAME_RELATED_P (sp_tmp) = 1;
7635 XVECEXP (dwarf, 0, 0) = sp_tmp;
7636 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
7637
7638 loaded_regnum = 0;
7639 regno_list[0] = regno_list[1] = -1;
7640 }
7641 }
7642 else
7643 {
7644 insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno),
7645 ppx_p: use_ppx));
7646 RTX_FRAME_RELATED_P (insn) = 1;
7647 aligned = true;
7648 }
7649 }
7650 if (loaded_regnum == 1)
7651 {
7652 insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode,
7653 regno_list[0]),
7654 ppx_p: use_ppx));
7655 RTX_FRAME_RELATED_P (insn) = 1;
7656 }
7657 }
7658}
7659
7660/* Emit a single register save at CFA - CFA_OFFSET. */
7661
7662static void
7663ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7664 HOST_WIDE_INT cfa_offset)
7665{
7666 struct machine_function *m = cfun->machine;
7667 rtx reg = gen_rtx_REG (mode, regno);
7668 rtx mem, addr, base, insn;
7669 unsigned int align = GET_MODE_ALIGNMENT (mode);
7670
7671 addr = choose_baseaddr (cfa_offset, align: &align);
7672 mem = gen_frame_mem (mode, addr);
7673
7674 /* The location aligment depends upon the base register. */
7675 align = MIN (GET_MODE_ALIGNMENT (mode), align);
7676 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7677 set_mem_align (mem, align);
7678
7679 insn = emit_insn (gen_rtx_SET (mem, reg));
7680 RTX_FRAME_RELATED_P (insn) = 1;
7681
7682 base = addr;
7683 if (GET_CODE (base) == PLUS)
7684 base = XEXP (base, 0);
7685 gcc_checking_assert (REG_P (base));
7686
7687 /* When saving registers into a re-aligned local stack frame, avoid
7688 any tricky guessing by dwarf2out. */
7689 if (m->fs.realigned)
7690 {
7691 gcc_checking_assert (stack_realign_drap);
7692
7693 if (regno == REGNO (crtl->drap_reg))
7694 {
7695 /* A bit of a hack. We force the DRAP register to be saved in
7696 the re-aligned stack frame, which provides us with a copy
7697 of the CFA that will last past the prologue. Install it. */
7698 gcc_checking_assert (cfun->machine->fs.fp_valid);
7699 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7700 cfun->machine->fs.fp_offset - cfa_offset);
7701 mem = gen_rtx_MEM (mode, addr);
7702 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7703 }
7704 else
7705 {
7706 /* The frame pointer is a stable reference within the
7707 aligned frame. Use it. */
7708 gcc_checking_assert (cfun->machine->fs.fp_valid);
7709 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7710 cfun->machine->fs.fp_offset - cfa_offset);
7711 mem = gen_rtx_MEM (mode, addr);
7712 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7713 }
7714 }
7715
7716 else if (base == stack_pointer_rtx && m->fs.sp_realigned
7717 && cfa_offset >= m->fs.sp_realigned_offset)
7718 {
7719 gcc_checking_assert (stack_realign_fp);
7720 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7721 }
7722
7723 /* The memory may not be relative to the current CFA register,
7724 which means that we may need to generate a new pattern for
7725 use by the unwind info. */
7726 else if (base != m->fs.cfa_reg)
7727 {
7728 addr = plus_constant (Pmode, m->fs.cfa_reg,
7729 m->fs.cfa_offset - cfa_offset);
7730 mem = gen_rtx_MEM (mode, addr);
7731 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7732 }
7733}
7734
7735/* Emit code to save registers using MOV insns.
7736 First register is stored at CFA - CFA_OFFSET. */
7737static void
7738ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7739{
7740 unsigned int regno;
7741
7742 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7743 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7744 {
7745 /* Skip registers, already processed by shrink wrap separate. */
7746 if (!cfun->machine->reg_is_wrapped_separately[regno])
7747 ix86_emit_save_reg_using_mov (mode: word_mode, regno, cfa_offset);
7748 cfa_offset -= UNITS_PER_WORD;
7749 }
7750}
7751
7752/* Emit code to save SSE registers using MOV insns.
7753 First register is stored at CFA - CFA_OFFSET. */
7754static void
7755ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7756{
7757 unsigned int regno;
7758
7759 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7760 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7761 {
7762 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7763 cfa_offset -= GET_MODE_SIZE (V4SFmode);
7764 }
7765}
7766
7767static GTY(()) rtx queued_cfa_restores;
7768
7769/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7770 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7771 Don't add the note if the previously saved value will be left untouched
7772 within stack red-zone till return, as unwinders can find the same value
7773 in the register and on the stack. */
7774
7775static void
7776ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7777{
7778 if (!crtl->shrink_wrapped
7779 && cfa_offset <= cfun->machine->fs.red_zone_offset)
7780 return;
7781
7782 if (insn)
7783 {
7784 add_reg_note (insn, REG_CFA_RESTORE, reg);
7785 RTX_FRAME_RELATED_P (insn) = 1;
7786 }
7787 else
7788 queued_cfa_restores
7789 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7790}
7791
7792/* Add queued REG_CFA_RESTORE notes if any to INSN. */
7793
7794static void
7795ix86_add_queued_cfa_restore_notes (rtx insn)
7796{
7797 rtx last;
7798 if (!queued_cfa_restores)
7799 return;
7800 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7801 ;
7802 XEXP (last, 1) = REG_NOTES (insn);
7803 REG_NOTES (insn) = queued_cfa_restores;
7804 queued_cfa_restores = NULL_RTX;
7805 RTX_FRAME_RELATED_P (insn) = 1;
7806}
7807
7808/* Expand prologue or epilogue stack adjustment.
7809 The pattern exist to put a dependency on all ebp-based memory accesses.
7810 STYLE should be negative if instructions should be marked as frame related,
7811 zero if %r11 register is live and cannot be freely used and positive
7812 otherwise. */
7813
7814static rtx
7815pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7816 int style, bool set_cfa)
7817{
7818 struct machine_function *m = cfun->machine;
7819 rtx addend = offset;
7820 rtx insn;
7821 bool add_frame_related_expr = false;
7822
7823 if (!x86_64_immediate_operand (offset, Pmode))
7824 {
7825 /* r11 is used by indirect sibcall return as well, set before the
7826 epilogue and used after the epilogue. */
7827 if (style)
7828 addend = gen_rtx_REG (Pmode, R11_REG);
7829 else
7830 {
7831 gcc_assert (src != hard_frame_pointer_rtx
7832 && dest != hard_frame_pointer_rtx);
7833 addend = hard_frame_pointer_rtx;
7834 }
7835 emit_insn (gen_rtx_SET (addend, offset));
7836 if (style < 0)
7837 add_frame_related_expr = true;
7838 }
7839
7840 /* Shrink wrap separate may insert prologue between TEST and JMP. In order
7841 not to affect EFlags, emit add without reg clobbering. */
7842 if (crtl->shrink_wrapped_separate)
7843 insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
7844 (Pmode, x0: dest, x1: src, x2: addend));
7845 else
7846 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7847 (Pmode, x0: dest, x1: src, x2: addend));
7848
7849 if (style >= 0)
7850 ix86_add_queued_cfa_restore_notes (insn);
7851
7852 if (set_cfa)
7853 {
7854 rtx r;
7855
7856 gcc_assert (m->fs.cfa_reg == src);
7857 m->fs.cfa_offset += INTVAL (offset);
7858 m->fs.cfa_reg = dest;
7859
7860 r = gen_rtx_PLUS (Pmode, src, offset);
7861 r = gen_rtx_SET (dest, r);
7862 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7863 RTX_FRAME_RELATED_P (insn) = 1;
7864 }
7865 else if (style < 0)
7866 {
7867 RTX_FRAME_RELATED_P (insn) = 1;
7868 if (add_frame_related_expr)
7869 {
7870 rtx r = gen_rtx_PLUS (Pmode, src, offset);
7871 r = gen_rtx_SET (dest, r);
7872 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7873 }
7874 }
7875
7876 if (dest == stack_pointer_rtx)
7877 {
7878 HOST_WIDE_INT ooffset = m->fs.sp_offset;
7879 bool valid = m->fs.sp_valid;
7880 bool realigned = m->fs.sp_realigned;
7881
7882 if (src == hard_frame_pointer_rtx)
7883 {
7884 valid = m->fs.fp_valid;
7885 realigned = false;
7886 ooffset = m->fs.fp_offset;
7887 }
7888 else if (src == crtl->drap_reg)
7889 {
7890 valid = m->fs.drap_valid;
7891 realigned = false;
7892 ooffset = 0;
7893 }
7894 else
7895 {
7896 /* Else there are two possibilities: SP itself, which we set
7897 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7898 taken care of this by hand along the eh_return path. */
7899 gcc_checking_assert (src == stack_pointer_rtx
7900 || offset == const0_rtx);
7901 }
7902
7903 m->fs.sp_offset = ooffset - INTVAL (offset);
7904 m->fs.sp_valid = valid;
7905 m->fs.sp_realigned = realigned;
7906 }
7907 return insn;
7908}
7909
7910/* Find an available register to be used as dynamic realign argument
7911 pointer regsiter. Such a register will be written in prologue and
7912 used in begin of body, so it must not be
7913 1. parameter passing register.
7914 2. GOT pointer.
7915 We reuse static-chain register if it is available. Otherwise, we
7916 use DI for i386 and R13 for x86-64. We chose R13 since it has
7917 shorter encoding.
7918
7919 Return: the regno of chosen register. */
7920
7921static unsigned int
7922find_drap_reg (void)
7923{
7924 tree decl = cfun->decl;
7925
7926 /* Always use callee-saved register if there are no caller-saved
7927 registers. */
7928 if (TARGET_64BIT)
7929 {
7930 /* Use R13 for nested function or function need static chain.
7931 Since function with tail call may use any caller-saved
7932 registers in epilogue, DRAP must not use caller-saved
7933 register in such case. */
7934 if (DECL_STATIC_CHAIN (decl)
7935 || (cfun->machine->call_saved_registers
7936 == TYPE_NO_CALLER_SAVED_REGISTERS)
7937 || crtl->tail_call_emit)
7938 return R13_REG;
7939
7940 return R10_REG;
7941 }
7942 else
7943 {
7944 /* Use DI for nested function or function need static chain.
7945 Since function with tail call may use any caller-saved
7946 registers in epilogue, DRAP must not use caller-saved
7947 register in such case. */
7948 if (DECL_STATIC_CHAIN (decl)
7949 || (cfun->machine->call_saved_registers
7950 == TYPE_NO_CALLER_SAVED_REGISTERS)
7951 || crtl->tail_call_emit
7952 || crtl->calls_eh_return)
7953 return DI_REG;
7954
7955 /* Reuse static chain register if it isn't used for parameter
7956 passing. */
7957 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7958 {
7959 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7960 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7961 return CX_REG;
7962 }
7963 return DI_REG;
7964 }
7965}
7966
7967/* Return minimum incoming stack alignment. */
7968
7969static unsigned int
7970ix86_minimum_incoming_stack_boundary (bool sibcall)
7971{
7972 unsigned int incoming_stack_boundary;
7973
7974 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7975 if (cfun->machine->func_type != TYPE_NORMAL)
7976 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7977 /* Prefer the one specified at command line. */
7978 else if (ix86_user_incoming_stack_boundary)
7979 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
7980 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7981 if -mstackrealign is used, it isn't used for sibcall check and
7982 estimated stack alignment is 128bit. */
7983 else if (!sibcall
7984 && ix86_force_align_arg_pointer
7985 && crtl->stack_alignment_estimated == 128)
7986 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7987 else
7988 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
7989
7990 /* Incoming stack alignment can be changed on individual functions
7991 via force_align_arg_pointer attribute. We use the smallest
7992 incoming stack boundary. */
7993 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
7994 && lookup_attribute (attr_name: "force_align_arg_pointer",
7995 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7996 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7997
7998 /* The incoming stack frame has to be aligned at least at
7999 parm_stack_boundary. */
8000 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8001 incoming_stack_boundary = crtl->parm_stack_boundary;
8002
8003 /* Stack at entrance of main is aligned by runtime. We use the
8004 smallest incoming stack boundary. */
8005 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8006 && DECL_NAME (current_function_decl)
8007 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8008 && DECL_FILE_SCOPE_P (current_function_decl))
8009 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8010
8011 return incoming_stack_boundary;
8012}
8013
8014/* Update incoming stack boundary and estimated stack alignment. */
8015
8016static void
8017ix86_update_stack_boundary (void)
8018{
8019 ix86_incoming_stack_boundary
8020 = ix86_minimum_incoming_stack_boundary (sibcall: false);
8021
8022 /* x86_64 vararg needs 16byte stack alignment for register save area. */
8023 if (TARGET_64BIT
8024 && cfun->stdarg
8025 && crtl->stack_alignment_estimated < 128)
8026 crtl->stack_alignment_estimated = 128;
8027
8028 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
8029 if (ix86_tls_descriptor_calls_expanded_in_cfun
8030 && crtl->preferred_stack_boundary < 128)
8031 crtl->preferred_stack_boundary = 128;
8032
8033 /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
8034 are 32 bits, but if force_align_arg_pointer is specified, it should
8035 prefer 128 bits for a backward-compatibility reason, which is also
8036 what the doc suggests. */
8037 if (lookup_attribute (attr_name: "force_align_arg_pointer",
8038 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
8039 && crtl->preferred_stack_boundary < 128)
8040 crtl->preferred_stack_boundary = 128;
8041}
8042
8043/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8044 needed or an rtx for DRAP otherwise. */
8045
8046static rtx
8047ix86_get_drap_rtx (void)
8048{
8049 /* We must use DRAP if there are outgoing arguments on stack or
8050 the stack pointer register is clobbered by asm statement and
8051 ACCUMULATE_OUTGOING_ARGS is false. */
8052 if (ix86_force_drap
8053 || ((cfun->machine->outgoing_args_on_stack
8054 || crtl->sp_is_clobbered_by_asm)
8055 && !ACCUMULATE_OUTGOING_ARGS))
8056 crtl->need_drap = true;
8057
8058 if (stack_realign_drap)
8059 {
8060 /* Assign DRAP to vDRAP and returns vDRAP */
8061 unsigned int regno = find_drap_reg ();
8062 rtx drap_vreg;
8063 rtx arg_ptr;
8064 rtx_insn *seq, *insn;
8065
8066 arg_ptr = gen_rtx_REG (Pmode, regno);
8067 crtl->drap_reg = arg_ptr;
8068
8069 start_sequence ();
8070 drap_vreg = copy_to_reg (arg_ptr);
8071 seq = end_sequence ();
8072
8073 insn = emit_insn_before (seq, NEXT_INSN (insn: entry_of_function ()));
8074 if (!optimize)
8075 {
8076 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8077 RTX_FRAME_RELATED_P (insn) = 1;
8078 }
8079 return drap_vreg;
8080 }
8081 else
8082 return NULL;
8083}
8084
8085/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8086
8087static rtx
8088ix86_internal_arg_pointer (void)
8089{
8090 return virtual_incoming_args_rtx;
8091}
8092
8093struct scratch_reg {
8094 rtx reg;
8095 bool saved;
8096};
8097
8098/* Return a short-lived scratch register for use on function entry.
8099 In 32-bit mode, it is valid only after the registers are saved
8100 in the prologue. This register must be released by means of
8101 release_scratch_register_on_entry once it is dead. */
8102
8103static void
8104get_scratch_register_on_entry (struct scratch_reg *sr)
8105{
8106 int regno;
8107
8108 sr->saved = false;
8109
8110 if (TARGET_64BIT)
8111 {
8112 /* We always use R11 in 64-bit mode. */
8113 regno = R11_REG;
8114 }
8115 else
8116 {
8117 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
8118 bool fastcall_p
8119 = lookup_attribute (attr_name: "fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8120 bool thiscall_p
8121 = lookup_attribute (attr_name: "thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8122 bool static_chain_p = DECL_STATIC_CHAIN (decl);
8123 int regparm = ix86_function_regparm (type: fntype, decl);
8124 int drap_regno
8125 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
8126
8127 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
8128 for the static chain register. */
8129 if ((regparm < 1 || (fastcall_p && !static_chain_p))
8130 && drap_regno != AX_REG)
8131 regno = AX_REG;
8132 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
8133 for the static chain register. */
8134 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
8135 regno = AX_REG;
8136 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
8137 regno = DX_REG;
8138 /* ecx is the static chain register. */
8139 else if (regparm < 3 && !fastcall_p && !thiscall_p
8140 && !static_chain_p
8141 && drap_regno != CX_REG)
8142 regno = CX_REG;
8143 else if (ix86_save_reg (BX_REG, maybe_eh_return: true, ignore_outlined: false))
8144 regno = BX_REG;
8145 /* esi is the static chain register. */
8146 else if (!(regparm == 3 && static_chain_p)
8147 && ix86_save_reg (SI_REG, maybe_eh_return: true, ignore_outlined: false))
8148 regno = SI_REG;
8149 else if (ix86_save_reg (DI_REG, maybe_eh_return: true, ignore_outlined: false))
8150 regno = DI_REG;
8151 else
8152 {
8153 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
8154 sr->saved = true;
8155 }
8156 }
8157
8158 sr->reg = gen_rtx_REG (Pmode, regno);
8159 if (sr->saved)
8160 {
8161 rtx_insn *insn = emit_insn (gen_push (arg: sr->reg));
8162 RTX_FRAME_RELATED_P (insn) = 1;
8163 }
8164}
8165
8166/* Release a scratch register obtained from the preceding function.
8167
8168 If RELEASE_VIA_POP is true, we just pop the register off the stack
8169 to release it. This is what non-Linux systems use with -fstack-check.
8170
8171 Otherwise we use OFFSET to locate the saved register and the
8172 allocated stack space becomes part of the local frame and is
8173 deallocated by the epilogue. */
8174
8175static void
8176release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
8177 bool release_via_pop)
8178{
8179 if (sr->saved)
8180 {
8181 if (release_via_pop)
8182 {
8183 struct machine_function *m = cfun->machine;
8184 rtx x, insn = emit_insn (gen_pop (arg: sr->reg));
8185
8186 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
8187 RTX_FRAME_RELATED_P (insn) = 1;
8188 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8189 x = gen_rtx_SET (stack_pointer_rtx, x);
8190 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8191 m->fs.sp_offset -= UNITS_PER_WORD;
8192 }
8193 else
8194 {
8195 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
8196 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
8197 emit_insn (x);
8198 }
8199 }
8200}
8201
8202/* Emit code to adjust the stack pointer by SIZE bytes while probing it.
8203
8204 If INT_REGISTERS_SAVED is true, then integer registers have already been
8205 pushed on the stack.
8206
8207 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
8208 beyond SIZE bytes.
8209
8210 This assumes no knowledge of the current probing state, i.e. it is never
8211 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
8212 a suitable probe. */
8213
8214static void
8215ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
8216 const bool int_registers_saved,
8217 const bool protection_area)
8218{
8219 struct machine_function *m = cfun->machine;
8220
8221 /* If this function does not statically allocate stack space, then
8222 no probes are needed. */
8223 if (!size)
8224 {
8225 /* However, the allocation of space via pushes for register
8226 saves could be viewed as allocating space, but without the
8227 need to probe. */
8228 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
8229 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8230 else
8231 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
8232 return;
8233 }
8234
8235 /* If we are a noreturn function, then we have to consider the
8236 possibility that we're called via a jump rather than a call.
8237
8238 Thus we don't have the implicit probe generated by saving the
8239 return address into the stack at the call. Thus, the stack
8240 pointer could be anywhere in the guard page. The safe thing
8241 to do is emit a probe now.
8242
8243 The probe can be avoided if we have already emitted any callee
8244 register saves into the stack or have a frame pointer (which will
8245 have been saved as well). Those saves will function as implicit
8246 probes.
8247
8248 ?!? This should be revamped to work like aarch64 and s390 where
8249 we track the offset from the most recent probe. Normally that
8250 offset would be zero. For a noreturn function we would reset
8251 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
8252 we just probe when we cross PROBE_INTERVAL. */
8253 if (TREE_THIS_VOLATILE (cfun->decl)
8254 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
8255 {
8256 /* We can safely use any register here since we're just going to push
8257 its value and immediately pop it back. But we do try and avoid
8258 argument passing registers so as not to introduce dependencies in
8259 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
8260 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
8261 rtx_insn *insn_push = emit_insn (gen_push (arg: dummy_reg));
8262 rtx_insn *insn_pop = emit_insn (gen_pop (arg: dummy_reg));
8263 m->fs.sp_offset -= UNITS_PER_WORD;
8264 if (m->fs.cfa_reg == stack_pointer_rtx)
8265 {
8266 m->fs.cfa_offset -= UNITS_PER_WORD;
8267 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8268 x = gen_rtx_SET (stack_pointer_rtx, x);
8269 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
8270 RTX_FRAME_RELATED_P (insn_push) = 1;
8271 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8272 x = gen_rtx_SET (stack_pointer_rtx, x);
8273 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
8274 RTX_FRAME_RELATED_P (insn_pop) = 1;
8275 }
8276 emit_insn (gen_blockage ());
8277 }
8278
8279 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8280 const int dope = 4 * UNITS_PER_WORD;
8281
8282 /* If there is protection area, take it into account in the size. */
8283 if (protection_area)
8284 size += probe_interval + dope;
8285
8286 /* If we allocate less than the size of the guard statically,
8287 then no probing is necessary, but we do need to allocate
8288 the stack. */
8289 else if (size < (1 << param_stack_clash_protection_guard_size))
8290 {
8291 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8292 GEN_INT (-size), style: -1,
8293 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8294 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8295 return;
8296 }
8297
8298 /* We're allocating a large enough stack frame that we need to
8299 emit probes. Either emit them inline or in a loop depending
8300 on the size. */
8301 if (size <= 4 * probe_interval)
8302 {
8303 HOST_WIDE_INT i;
8304 for (i = probe_interval; i <= size; i += probe_interval)
8305 {
8306 /* Allocate PROBE_INTERVAL bytes. */
8307 rtx insn
8308 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8309 GEN_INT (-probe_interval), style: -1,
8310 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8311 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
8312
8313 /* And probe at *sp. */
8314 emit_stack_probe (stack_pointer_rtx);
8315 emit_insn (gen_blockage ());
8316 }
8317
8318 /* We need to allocate space for the residual, but we do not need
8319 to probe the residual... */
8320 HOST_WIDE_INT residual = (i - probe_interval - size);
8321 if (residual)
8322 {
8323 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8324 GEN_INT (residual), style: -1,
8325 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8326
8327 /* ...except if there is a protection area to maintain. */
8328 if (protection_area)
8329 emit_stack_probe (stack_pointer_rtx);
8330 }
8331
8332 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
8333 }
8334 else
8335 {
8336 /* We expect the GP registers to be saved when probes are used
8337 as the probing sequences might need a scratch register and
8338 the routine to allocate one assumes the integer registers
8339 have already been saved. */
8340 gcc_assert (int_registers_saved);
8341
8342 struct scratch_reg sr;
8343 get_scratch_register_on_entry (sr: &sr);
8344
8345 /* If we needed to save a register, then account for any space
8346 that was pushed (we are not going to pop the register when
8347 we do the restore). */
8348 if (sr.saved)
8349 size -= UNITS_PER_WORD;
8350
8351 /* Step 1: round SIZE down to a multiple of the interval. */
8352 HOST_WIDE_INT rounded_size = size & -probe_interval;
8353
8354 /* Step 2: compute final value of the loop counter. Use lea if
8355 possible. */
8356 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
8357 rtx insn;
8358 if (address_no_seg_operand (addr, Pmode))
8359 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
8360 else
8361 {
8362 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8363 insn = emit_insn (gen_rtx_SET (sr.reg,
8364 gen_rtx_PLUS (Pmode, sr.reg,
8365 stack_pointer_rtx)));
8366 }
8367 if (m->fs.cfa_reg == stack_pointer_rtx)
8368 {
8369 add_reg_note (insn, REG_CFA_DEF_CFA,
8370 plus_constant (Pmode, sr.reg,
8371 m->fs.cfa_offset + rounded_size));
8372 RTX_FRAME_RELATED_P (insn) = 1;
8373 }
8374
8375 /* Step 3: the loop. */
8376 rtx size_rtx = GEN_INT (rounded_size);
8377 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, x0: sr.reg, x1: sr.reg,
8378 x2: size_rtx));
8379 if (m->fs.cfa_reg == stack_pointer_rtx)
8380 {
8381 m->fs.cfa_offset += rounded_size;
8382 add_reg_note (insn, REG_CFA_DEF_CFA,
8383 plus_constant (Pmode, stack_pointer_rtx,
8384 m->fs.cfa_offset));
8385 RTX_FRAME_RELATED_P (insn) = 1;
8386 }
8387 m->fs.sp_offset += rounded_size;
8388 emit_insn (gen_blockage ());
8389
8390 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
8391 is equal to ROUNDED_SIZE. */
8392
8393 if (size != rounded_size)
8394 {
8395 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8396 GEN_INT (rounded_size - size), style: -1,
8397 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8398
8399 if (protection_area)
8400 emit_stack_probe (stack_pointer_rtx);
8401 }
8402
8403 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
8404
8405 /* This does not deallocate the space reserved for the scratch
8406 register. That will be deallocated in the epilogue. */
8407 release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: false);
8408 }
8409
8410 /* Adjust back to account for the protection area. */
8411 if (protection_area)
8412 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8413 GEN_INT (probe_interval + dope), style: -1,
8414 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8415
8416 /* Make sure nothing is scheduled before we are done. */
8417 emit_insn (gen_blockage ());
8418}
8419
8420/* Adjust the stack pointer up to REG while probing it. */
8421
8422const char *
8423output_adjust_stack_and_probe (rtx reg)
8424{
8425 static int labelno = 0;
8426 char loop_lab[32];
8427 rtx xops[2];
8428
8429 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8430
8431 /* Loop. */
8432 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8433
8434 /* SP = SP + PROBE_INTERVAL. */
8435 xops[0] = stack_pointer_rtx;
8436 xops[1] = GEN_INT (get_probe_interval ());
8437 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8438
8439 /* Probe at SP. */
8440 xops[1] = const0_rtx;
8441 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
8442
8443 /* Test if SP == LAST_ADDR. */
8444 xops[0] = stack_pointer_rtx;
8445 xops[1] = reg;
8446 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8447
8448 /* Branch. */
8449 fputs (s: "\tjne\t", stream: asm_out_file);
8450 assemble_name_raw (asm_out_file, loop_lab);
8451 fputc (c: '\n', stream: asm_out_file);
8452
8453 return "";
8454}
8455
8456/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
8457 inclusive. These are offsets from the current stack pointer.
8458
8459 INT_REGISTERS_SAVED is true if integer registers have already been
8460 pushed on the stack. */
8461
8462static void
8463ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
8464 const bool int_registers_saved)
8465{
8466 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8467
8468 /* See if we have a constant small number of probes to generate. If so,
8469 that's the easy case. The run-time loop is made up of 6 insns in the
8470 generic case while the compile-time loop is made up of n insns for n #
8471 of intervals. */
8472 if (size <= 6 * probe_interval)
8473 {
8474 HOST_WIDE_INT i;
8475
8476 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8477 it exceeds SIZE. If only one probe is needed, this will not
8478 generate any code. Then probe at FIRST + SIZE. */
8479 for (i = probe_interval; i < size; i += probe_interval)
8480 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8481 -(first + i)));
8482
8483 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8484 -(first + size)));
8485 }
8486
8487 /* Otherwise, do the same as above, but in a loop. Note that we must be
8488 extra careful with variables wrapping around because we might be at
8489 the very top (or the very bottom) of the address space and we have
8490 to be able to handle this case properly; in particular, we use an
8491 equality test for the loop condition. */
8492 else
8493 {
8494 /* We expect the GP registers to be saved when probes are used
8495 as the probing sequences might need a scratch register and
8496 the routine to allocate one assumes the integer registers
8497 have already been saved. */
8498 gcc_assert (int_registers_saved);
8499
8500 HOST_WIDE_INT rounded_size, last;
8501 struct scratch_reg sr;
8502
8503 get_scratch_register_on_entry (sr: &sr);
8504
8505
8506 /* Step 1: round SIZE to the previous multiple of the interval. */
8507
8508 rounded_size = ROUND_DOWN (size, probe_interval);
8509
8510
8511 /* Step 2: compute initial and final value of the loop counter. */
8512
8513 /* TEST_OFFSET = FIRST. */
8514 emit_move_insn (sr.reg, GEN_INT (-first));
8515
8516 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8517 last = first + rounded_size;
8518
8519
8520 /* Step 3: the loop
8521
8522 do
8523 {
8524 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8525 probe at TEST_ADDR
8526 }
8527 while (TEST_ADDR != LAST_ADDR)
8528
8529 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8530 until it is equal to ROUNDED_SIZE. */
8531
8532 emit_insn
8533 (gen_probe_stack_range (Pmode, x0: sr.reg, x1: sr.reg, GEN_INT (-last)));
8534
8535
8536 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8537 that SIZE is equal to ROUNDED_SIZE. */
8538
8539 if (size != rounded_size)
8540 emit_stack_probe (plus_constant (Pmode,
8541 gen_rtx_PLUS (Pmode,
8542 stack_pointer_rtx,
8543 sr.reg),
8544 rounded_size - size));
8545
8546 release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: true);
8547 }
8548
8549 /* Make sure nothing is scheduled before we are done. */
8550 emit_insn (gen_blockage ());
8551}
8552
8553/* Probe a range of stack addresses from REG to END, inclusive. These are
8554 offsets from the current stack pointer. */
8555
8556const char *
8557output_probe_stack_range (rtx reg, rtx end)
8558{
8559 static int labelno = 0;
8560 char loop_lab[32];
8561 rtx xops[3];
8562
8563 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8564
8565 /* Loop. */
8566 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8567
8568 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8569 xops[0] = reg;
8570 xops[1] = GEN_INT (get_probe_interval ());
8571 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8572
8573 /* Probe at TEST_ADDR. */
8574 xops[0] = stack_pointer_rtx;
8575 xops[1] = reg;
8576 xops[2] = const0_rtx;
8577 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
8578
8579 /* Test if TEST_ADDR == LAST_ADDR. */
8580 xops[0] = reg;
8581 xops[1] = end;
8582 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8583
8584 /* Branch. */
8585 fputs (s: "\tjne\t", stream: asm_out_file);
8586 assemble_name_raw (asm_out_file, loop_lab);
8587 fputc (c: '\n', stream: asm_out_file);
8588
8589 return "";
8590}
8591
8592/* Data passed to ix86_update_stack_alignment. */
8593struct stack_access_data
8594{
8595 /* The stack access register. */
8596 const_rtx reg;
8597 /* Pointer to stack alignment. */
8598 unsigned int *stack_alignment;
8599};
8600
8601/* Update the maximum stack slot alignment from memory alignment in PAT. */
8602
8603static void
8604ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
8605{
8606 /* This insn may reference stack slot. Update the maximum stack slot
8607 alignment if the memory is referenced by the stack access register. */
8608 stack_access_data *p = (stack_access_data *) data;
8609
8610 subrtx_iterator::array_type array;
8611 FOR_EACH_SUBRTX (iter, array, pat, ALL)
8612 {
8613 auto op = *iter;
8614 if (MEM_P (op))
8615 {
8616 if (reg_mentioned_p (p->reg, XEXP (op, 0)))
8617 {
8618 unsigned int alignment = MEM_ALIGN (op);
8619
8620 if (alignment > *p->stack_alignment)
8621 *p->stack_alignment = alignment;
8622 break;
8623 }
8624 else
8625 iter.skip_subrtxes ();
8626 }
8627 }
8628}
8629
8630/* Helper function for ix86_find_all_reg_uses. */
8631
8632static void
8633ix86_find_all_reg_uses_1 (HARD_REG_SET &regset,
8634 rtx set, unsigned int regno,
8635 auto_bitmap &worklist)
8636{
8637 rtx dest = SET_DEST (set);
8638
8639 if (!REG_P (dest))
8640 return;
8641
8642 /* Reject non-Pmode modes. */
8643 if (GET_MODE (dest) != Pmode)
8644 return;
8645
8646 unsigned int dst_regno = REGNO (dest);
8647
8648 if (TEST_HARD_REG_BIT (set: regset, bit: dst_regno))
8649 return;
8650
8651 const_rtx src = SET_SRC (set);
8652
8653 subrtx_iterator::array_type array;
8654 FOR_EACH_SUBRTX (iter, array, src, ALL)
8655 {
8656 auto op = *iter;
8657
8658 if (MEM_P (op))
8659 iter.skip_subrtxes ();
8660
8661 if (REG_P (op) && REGNO (op) == regno)
8662 {
8663 /* Add this register to register set. */
8664 add_to_hard_reg_set (regs: &regset, Pmode, regno: dst_regno);
8665 bitmap_set_bit (worklist, dst_regno);
8666 break;
8667 }
8668 }
8669}
8670
8671/* Find all registers defined with register REGNO. */
8672
8673static void
8674ix86_find_all_reg_uses (HARD_REG_SET &regset,
8675 unsigned int regno, auto_bitmap &worklist)
8676{
8677 for (df_ref ref = DF_REG_USE_CHAIN (regno);
8678 ref != NULL;
8679 ref = DF_REF_NEXT_REG (ref))
8680 {
8681 if (DF_REF_IS_ARTIFICIAL (ref))
8682 continue;
8683
8684 rtx_insn *insn = DF_REF_INSN (ref);
8685
8686 if (!NONJUMP_INSN_P (insn))
8687 continue;
8688
8689 unsigned int ref_regno = DF_REF_REGNO (ref);
8690
8691 rtx set = single_set (insn);
8692 if (set)
8693 {
8694 ix86_find_all_reg_uses_1 (regset, set,
8695 regno: ref_regno, worklist);
8696 continue;
8697 }
8698
8699 rtx pat = PATTERN (insn);
8700 if (GET_CODE (pat) != PARALLEL)
8701 continue;
8702
8703 for (int i = 0; i < XVECLEN (pat, 0); i++)
8704 {
8705 rtx exp = XVECEXP (pat, 0, i);
8706
8707 if (GET_CODE (exp) == SET)
8708 ix86_find_all_reg_uses_1 (regset, set: exp,
8709 regno: ref_regno, worklist);
8710 }
8711 }
8712}
8713
8714/* Set stack_frame_required to false if stack frame isn't required.
8715 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8716 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8717
8718static void
8719ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8720 bool check_stack_slot)
8721{
8722 HARD_REG_SET set_up_by_prologue, prologue_used;
8723 basic_block bb;
8724
8725 CLEAR_HARD_REG_SET (set&: prologue_used);
8726 CLEAR_HARD_REG_SET (set&: set_up_by_prologue);
8727 add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8728 add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8729 add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode,
8730 HARD_FRAME_POINTER_REGNUM);
8731
8732 bool require_stack_frame = false;
8733
8734 FOR_EACH_BB_FN (bb, cfun)
8735 {
8736 rtx_insn *insn;
8737 FOR_BB_INSNS (bb, insn)
8738 if (NONDEBUG_INSN_P (insn)
8739 && requires_stack_frame_p (insn, prologue_used,
8740 set_up_by_prologue))
8741 {
8742 require_stack_frame = true;
8743 break;
8744 }
8745 }
8746
8747 cfun->machine->stack_frame_required = require_stack_frame;
8748
8749 /* Stop if we don't need to check stack slot. */
8750 if (!check_stack_slot)
8751 return;
8752
8753 /* The preferred stack alignment is the minimum stack alignment. */
8754 if (stack_alignment > crtl->preferred_stack_boundary)
8755 stack_alignment = crtl->preferred_stack_boundary;
8756
8757 HARD_REG_SET stack_slot_access;
8758 CLEAR_HARD_REG_SET (set&: stack_slot_access);
8759
8760 /* Stack slot can be accessed by stack pointer, frame pointer or
8761 registers defined by stack pointer or frame pointer. */
8762 auto_bitmap worklist;
8763
8764 add_to_hard_reg_set (regs: &stack_slot_access, Pmode, STACK_POINTER_REGNUM);
8765 bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
8766
8767 if (frame_pointer_needed)
8768 {
8769 add_to_hard_reg_set (regs: &stack_slot_access, Pmode,
8770 HARD_FRAME_POINTER_REGNUM);
8771 bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
8772 }
8773
8774 unsigned int regno;
8775
8776 do
8777 {
8778 regno = bitmap_clear_first_set_bit (worklist);
8779 ix86_find_all_reg_uses (regset&: stack_slot_access, regno, worklist);
8780 }
8781 while (!bitmap_empty_p (map: worklist));
8782
8783 hard_reg_set_iterator hrsi;
8784 stack_access_data data;
8785
8786 data.stack_alignment = &stack_alignment;
8787
8788 EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
8789 for (df_ref ref = DF_REG_USE_CHAIN (regno);
8790 ref != NULL;
8791 ref = DF_REF_NEXT_REG (ref))
8792 {
8793 if (DF_REF_IS_ARTIFICIAL (ref))
8794 continue;
8795
8796 rtx_insn *insn = DF_REF_INSN (ref);
8797
8798 if (!NONJUMP_INSN_P (insn))
8799 continue;
8800
8801 data.reg = DF_REF_REG (ref);
8802 note_stores (insn, ix86_update_stack_alignment, &data);
8803 }
8804}
8805
8806/* Finalize stack_realign_needed and frame_pointer_needed flags, which
8807 will guide prologue/epilogue to be generated in correct form. */
8808
8809static void
8810ix86_finalize_stack_frame_flags (void)
8811{
8812 /* Check if stack realign is really needed after reload, and
8813 stores result in cfun */
8814 unsigned int incoming_stack_boundary
8815 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8816 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8817 unsigned int stack_alignment
8818 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
8819 ? crtl->max_used_stack_slot_alignment
8820 : crtl->stack_alignment_needed);
8821 unsigned int stack_realign
8822 = (incoming_stack_boundary < stack_alignment);
8823 bool recompute_frame_layout_p = false;
8824
8825 if (crtl->stack_realign_finalized)
8826 {
8827 /* After stack_realign_needed is finalized, we can't no longer
8828 change it. */
8829 gcc_assert (crtl->stack_realign_needed == stack_realign);
8830 return;
8831 }
8832
8833 /* It is always safe to compute max_used_stack_alignment. We
8834 compute it only if 128-bit aligned load/store may be generated
8835 on misaligned stack slot which will lead to segfault. */
8836 bool check_stack_slot
8837 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
8838 ix86_find_max_used_stack_alignment (stack_alignment,
8839 check_stack_slot);
8840
8841 /* If the only reason for frame_pointer_needed is that we conservatively
8842 assumed stack realignment might be needed or -fno-omit-frame-pointer
8843 is used, but in the end nothing that needed the stack alignment had
8844 been spilled nor stack access, clear frame_pointer_needed and say we
8845 don't need stack realignment.
8846
8847 When vector register is used for piecewise move and store, we don't
8848 increase stack_alignment_needed as there is no register spill for
8849 piecewise move and store. Since stack_realign_needed is set to true
8850 by checking stack_alignment_estimated which is updated by pseudo
8851 vector register usage, we also need to check stack_realign_needed to
8852 eliminate frame pointer. */
8853 if ((stack_realign
8854 || (!flag_omit_frame_pointer && optimize)
8855 || crtl->stack_realign_needed)
8856 && frame_pointer_needed
8857 && crtl->is_leaf
8858 && crtl->sp_is_unchanging
8859 && !ix86_current_function_calls_tls_descriptor
8860 && !crtl->accesses_prior_frames
8861 && !cfun->calls_alloca
8862 && !crtl->calls_eh_return
8863 /* See ira_setup_eliminable_regset for the rationale. */
8864 && !(STACK_CHECK_MOVING_SP
8865 && flag_stack_check
8866 && flag_exceptions
8867 && cfun->can_throw_non_call_exceptions)
8868 && !ix86_frame_pointer_required ()
8869 && ix86_get_frame_size () == 0
8870 && ix86_nsaved_sseregs () == 0
8871 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
8872 {
8873 if (cfun->machine->stack_frame_required)
8874 {
8875 /* Stack frame is required. If stack alignment needed is less
8876 than incoming stack boundary, don't realign stack. */
8877 stack_realign = incoming_stack_boundary < stack_alignment;
8878 if (!stack_realign)
8879 {
8880 crtl->max_used_stack_slot_alignment
8881 = incoming_stack_boundary;
8882 crtl->stack_alignment_needed
8883 = incoming_stack_boundary;
8884 /* Also update preferred_stack_boundary for leaf
8885 functions. */
8886 crtl->preferred_stack_boundary
8887 = incoming_stack_boundary;
8888 }
8889 }
8890 else
8891 {
8892 /* If drap has been set, but it actually isn't live at the
8893 start of the function, there is no reason to set it up. */
8894 if (crtl->drap_reg)
8895 {
8896 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8897 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
8898 REGNO (crtl->drap_reg)))
8899 {
8900 crtl->drap_reg = NULL_RTX;
8901 crtl->need_drap = false;
8902 }
8903 }
8904 else
8905 cfun->machine->no_drap_save_restore = true;
8906
8907 frame_pointer_needed = false;
8908 stack_realign = false;
8909 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
8910 crtl->stack_alignment_needed = incoming_stack_boundary;
8911 crtl->stack_alignment_estimated = incoming_stack_boundary;
8912 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
8913 crtl->preferred_stack_boundary = incoming_stack_boundary;
8914 df_finish_pass (true);
8915 df_scan_alloc (NULL);
8916 df_scan_blocks ();
8917 df_compute_regs_ever_live (true);
8918 df_analyze ();
8919
8920 if (flag_var_tracking)
8921 {
8922 /* Since frame pointer is no longer available, replace it with
8923 stack pointer - UNITS_PER_WORD in debug insns. */
8924 df_ref ref, next;
8925 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
8926 ref; ref = next)
8927 {
8928 next = DF_REF_NEXT_REG (ref);
8929 if (!DF_REF_INSN_INFO (ref))
8930 continue;
8931
8932 /* Make sure the next ref is for a different instruction,
8933 so that we're not affected by the rescan. */
8934 rtx_insn *insn = DF_REF_INSN (ref);
8935 while (next && DF_REF_INSN (next) == insn)
8936 next = DF_REF_NEXT_REG (next);
8937
8938 if (DEBUG_INSN_P (insn))
8939 {
8940 bool changed = false;
8941 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
8942 {
8943 rtx *loc = DF_REF_LOC (ref);
8944 if (*loc == hard_frame_pointer_rtx)
8945 {
8946 *loc = plus_constant (Pmode,
8947 stack_pointer_rtx,
8948 -UNITS_PER_WORD);
8949 changed = true;
8950 }
8951 }
8952 if (changed)
8953 df_insn_rescan (insn);
8954 }
8955 }
8956 }
8957
8958 recompute_frame_layout_p = true;
8959 }
8960 }
8961 else if (crtl->max_used_stack_slot_alignment >= 128
8962 && cfun->machine->stack_frame_required)
8963 {
8964 /* We don't need to realign stack. max_used_stack_alignment is
8965 used to decide how stack frame should be aligned. This is
8966 independent of any psABIs nor 32-bit vs 64-bit. */
8967 cfun->machine->max_used_stack_alignment
8968 = stack_alignment / BITS_PER_UNIT;
8969 }
8970
8971 if (crtl->stack_realign_needed != stack_realign)
8972 recompute_frame_layout_p = true;
8973 crtl->stack_realign_needed = stack_realign;
8974 crtl->stack_realign_finalized = true;
8975 if (recompute_frame_layout_p)
8976 ix86_compute_frame_layout ();
8977}
8978
8979/* Delete SET_GOT right after entry block if it is allocated to reg. */
8980
8981static void
8982ix86_elim_entry_set_got (rtx reg)
8983{
8984 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8985 rtx_insn *c_insn = BB_HEAD (bb);
8986 if (!NONDEBUG_INSN_P (c_insn))
8987 c_insn = next_nonnote_nondebug_insn (c_insn);
8988 if (c_insn && NONJUMP_INSN_P (c_insn))
8989 {
8990 rtx pat = PATTERN (insn: c_insn);
8991 if (GET_CODE (pat) == PARALLEL)
8992 {
8993 rtx set = XVECEXP (pat, 0, 0);
8994 if (GET_CODE (set) == SET
8995 && GET_CODE (SET_SRC (set)) == UNSPEC
8996 && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
8997 && REGNO (SET_DEST (set)) == REGNO (reg))
8998 delete_insn (c_insn);
8999 }
9000 }
9001}
9002
9003static rtx
9004gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
9005{
9006 rtx addr, mem;
9007
9008 if (offset)
9009 addr = plus_constant (Pmode, frame_reg, offset);
9010 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
9011 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
9012}
9013
9014static inline rtx
9015gen_frame_load (rtx reg, rtx frame_reg, int offset)
9016{
9017 return gen_frame_set (reg, frame_reg, offset, store: false);
9018}
9019
9020static inline rtx
9021gen_frame_store (rtx reg, rtx frame_reg, int offset)
9022{
9023 return gen_frame_set (reg, frame_reg, offset, store: true);
9024}
9025
9026static void
9027ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
9028{
9029 struct machine_function *m = cfun->machine;
9030 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9031 + m->call_ms2sysv_extra_regs;
9032 rtvec v = rtvec_alloc (ncregs + 1);
9033 unsigned int align, i, vi = 0;
9034 rtx_insn *insn;
9035 rtx sym, addr;
9036 rtx rax = gen_rtx_REG (word_mode, AX_REG);
9037 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9038
9039 /* AL should only be live with sysv_abi. */
9040 gcc_assert (!ix86_eax_live_at_start_p ());
9041 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
9042
9043 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
9044 we've actually realigned the stack or not. */
9045 align = GET_MODE_ALIGNMENT (V4SFmode);
9046 addr = choose_baseaddr (cfa_offset: frame.stack_realign_offset
9047 + xlogue.get_stub_ptr_offset (), align: &align, AX_REG);
9048 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9049
9050 emit_insn (gen_rtx_SET (rax, addr));
9051
9052 /* Get the stub symbol. */
9053 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
9054 : XLOGUE_STUB_SAVE);
9055 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9056
9057 for (i = 0; i < ncregs; ++i)
9058 {
9059 const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i);
9060 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
9061 r.regno);
9062 RTVEC_ELT (v, vi++) = gen_frame_store (reg, frame_reg: rax, offset: -r.offset);
9063 }
9064
9065 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
9066
9067 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
9068 RTX_FRAME_RELATED_P (insn) = true;
9069}
9070
9071/* Generate and return an insn body to AND X with Y. */
9072
9073static rtx_insn *
9074gen_and2_insn (rtx x, rtx y)
9075{
9076 enum insn_code icode = optab_handler (op: and_optab, GET_MODE (x));
9077
9078 gcc_assert (insn_operand_matches (icode, 0, x));
9079 gcc_assert (insn_operand_matches (icode, 1, x));
9080 gcc_assert (insn_operand_matches (icode, 2, y));
9081
9082 return GEN_FCN (icode) (x, x, y);
9083}
9084
9085/* Expand the prologue into a bunch of separate insns. */
9086
9087void
9088ix86_expand_prologue (void)
9089{
9090 struct machine_function *m = cfun->machine;
9091 rtx insn, t;
9092 HOST_WIDE_INT allocate;
9093 bool int_registers_saved;
9094 bool sse_registers_saved;
9095 bool save_stub_call_needed;
9096 rtx static_chain = NULL_RTX;
9097
9098 ix86_last_zero_store_uid = 0;
9099 if (ix86_function_naked (fn: current_function_decl))
9100 {
9101 if (flag_stack_usage_info)
9102 current_function_static_stack_size = 0;
9103 return;
9104 }
9105
9106 ix86_finalize_stack_frame_flags ();
9107
9108 /* DRAP should not coexist with stack_realign_fp */
9109 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9110
9111 memset (s: &m->fs, c: 0, n: sizeof (m->fs));
9112
9113 /* Initialize CFA state for before the prologue. */
9114 m->fs.cfa_reg = stack_pointer_rtx;
9115 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9116
9117 /* Track SP offset to the CFA. We continue tracking this after we've
9118 swapped the CFA register away from SP. In the case of re-alignment
9119 this is fudged; we're interested to offsets within the local frame. */
9120 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9121 m->fs.sp_valid = true;
9122 m->fs.sp_realigned = false;
9123
9124 const struct ix86_frame &frame = cfun->machine->frame;
9125
9126 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (fn: current_function_decl))
9127 {
9128 /* We should have already generated an error for any use of
9129 ms_hook on a nested function. */
9130 gcc_checking_assert (!ix86_static_chain_on_stack);
9131
9132 /* Check if profiling is active and we shall use profiling before
9133 prologue variant. If so sorry. */
9134 if (crtl->profile && flag_fentry != 0)
9135 sorry ("%<ms_hook_prologue%> attribute is not compatible "
9136 "with %<-mfentry%> for 32-bit");
9137
9138 /* In ix86_asm_output_function_label we emitted:
9139 8b ff movl.s %edi,%edi
9140 55 push %ebp
9141 8b ec movl.s %esp,%ebp
9142
9143 This matches the hookable function prologue in Win32 API
9144 functions in Microsoft Windows XP Service Pack 2 and newer.
9145 Wine uses this to enable Windows apps to hook the Win32 API
9146 functions provided by Wine.
9147
9148 What that means is that we've already set up the frame pointer. */
9149
9150 if (frame_pointer_needed
9151 && !(crtl->drap_reg && crtl->stack_realign_needed))
9152 {
9153 rtx push, mov;
9154
9155 /* We've decided to use the frame pointer already set up.
9156 Describe this to the unwinder by pretending that both
9157 push and mov insns happen right here.
9158
9159 Putting the unwind info here at the end of the ms_hook
9160 is done so that we can make absolutely certain we get
9161 the required byte sequence at the start of the function,
9162 rather than relying on an assembler that can produce
9163 the exact encoding required.
9164
9165 However it does mean (in the unpatched case) that we have
9166 a 1 insn window where the asynchronous unwind info is
9167 incorrect. However, if we placed the unwind info at
9168 its correct location we would have incorrect unwind info
9169 in the patched case. Which is probably all moot since
9170 I don't expect Wine generates dwarf2 unwind info for the
9171 system libraries that use this feature. */
9172
9173 insn = emit_insn (gen_blockage ());
9174
9175 push = gen_push (hard_frame_pointer_rtx);
9176 mov = gen_rtx_SET (hard_frame_pointer_rtx,
9177 stack_pointer_rtx);
9178 RTX_FRAME_RELATED_P (push) = 1;
9179 RTX_FRAME_RELATED_P (mov) = 1;
9180
9181 RTX_FRAME_RELATED_P (insn) = 1;
9182 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9183 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9184
9185 /* Note that gen_push incremented m->fs.cfa_offset, even
9186 though we didn't emit the push insn here. */
9187 m->fs.cfa_reg = hard_frame_pointer_rtx;
9188 m->fs.fp_offset = m->fs.cfa_offset;
9189 m->fs.fp_valid = true;
9190 }
9191 else
9192 {
9193 /* The frame pointer is not needed so pop %ebp again.
9194 This leaves us with a pristine state. */
9195 emit_insn (gen_pop (hard_frame_pointer_rtx));
9196 }
9197 }
9198
9199 /* The first insn of a function that accepts its static chain on the
9200 stack is to push the register that would be filled in by a direct
9201 call. This insn will be skipped by the trampoline. */
9202 else if (ix86_static_chain_on_stack)
9203 {
9204 static_chain = ix86_static_chain (cfun->decl, false);
9205 insn = emit_insn (gen_push (arg: static_chain));
9206 emit_insn (gen_blockage ());
9207
9208 /* We don't want to interpret this push insn as a register save,
9209 only as a stack adjustment. The real copy of the register as
9210 a save will be done later, if needed. */
9211 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
9212 t = gen_rtx_SET (stack_pointer_rtx, t);
9213 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9214 RTX_FRAME_RELATED_P (insn) = 1;
9215 }
9216
9217 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9218 of DRAP is needed and stack realignment is really needed after reload */
9219 if (stack_realign_drap)
9220 {
9221 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9222
9223 /* Can't use DRAP in interrupt function. */
9224 if (cfun->machine->func_type != TYPE_NORMAL)
9225 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
9226 "in interrupt service routine. This may be worked "
9227 "around by avoiding functions with aggregate return.");
9228
9229 /* Only need to push parameter pointer reg if it is caller saved. */
9230 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9231 {
9232 /* Push arg pointer reg */
9233 insn = emit_insn (gen_push (crtl->drap_reg));
9234 RTX_FRAME_RELATED_P (insn) = 1;
9235 }
9236
9237 /* Grab the argument pointer. */
9238 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
9239 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9240 RTX_FRAME_RELATED_P (insn) = 1;
9241 m->fs.cfa_reg = crtl->drap_reg;
9242 m->fs.cfa_offset = 0;
9243
9244 /* Align the stack. */
9245 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
9246 GEN_INT (-align_bytes)));
9247 RTX_FRAME_RELATED_P (insn) = 1;
9248
9249 /* Replicate the return address on the stack so that return
9250 address can be reached via (argp - 1) slot. This is needed
9251 to implement macro RETURN_ADDR_RTX and intrinsic function
9252 expand_builtin_return_addr etc. */
9253 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
9254 t = gen_frame_mem (word_mode, t);
9255 insn = emit_insn (gen_push (arg: t));
9256 RTX_FRAME_RELATED_P (insn) = 1;
9257
9258 /* For the purposes of frame and register save area addressing,
9259 we've started over with a new frame. */
9260 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9261 m->fs.realigned = true;
9262
9263 if (static_chain)
9264 {
9265 /* Replicate static chain on the stack so that static chain
9266 can be reached via (argp - 2) slot. This is needed for
9267 nested function with stack realignment. */
9268 insn = emit_insn (gen_push (arg: static_chain));
9269 RTX_FRAME_RELATED_P (insn) = 1;
9270 }
9271 }
9272
9273 int_registers_saved = (frame.nregs == 0);
9274 sse_registers_saved = (frame.nsseregs == 0);
9275 save_stub_call_needed = (m->call_ms2sysv);
9276 gcc_assert (sse_registers_saved || !save_stub_call_needed);
9277
9278 if (frame_pointer_needed && !m->fs.fp_valid)
9279 {
9280 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9281 slower on all targets. Also sdb didn't like it. */
9282 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9283 RTX_FRAME_RELATED_P (insn) = 1;
9284
9285 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9286 {
9287 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9288 RTX_FRAME_RELATED_P (insn) = 1;
9289
9290 if (m->fs.cfa_reg == stack_pointer_rtx)
9291 m->fs.cfa_reg = hard_frame_pointer_rtx;
9292 m->fs.fp_offset = m->fs.sp_offset;
9293 m->fs.fp_valid = true;
9294 }
9295 }
9296
9297 if (!int_registers_saved)
9298 {
9299 /* If saving registers via PUSH, do so now. */
9300 if (!frame.save_regs_using_mov)
9301 {
9302 ix86_emit_save_regs ();
9303 m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
9304 int_registers_saved = true;
9305 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9306 }
9307
9308 /* When using red zone we may start register saving before allocating
9309 the stack frame saving one cycle of the prologue. However, avoid
9310 doing this if we have to probe the stack; at least on x86_64 the
9311 stack probe can turn into a call that clobbers a red zone location. */
9312 else if (ix86_using_red_zone ()
9313 && (! TARGET_STACK_PROBE
9314 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9315 {
9316 HOST_WIDE_INT allocate_offset;
9317 if (crtl->shrink_wrapped_separate)
9318 {
9319 allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
9320
9321 /* Adjust the total offset at the beginning of the function. */
9322 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9323 GEN_INT (allocate_offset), style: -1,
9324 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9325 m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
9326 }
9327
9328 ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset);
9329 int_registers_saved = true;
9330 }
9331 }
9332
9333 if (frame.red_zone_size != 0)
9334 cfun->machine->red_zone_used = true;
9335
9336 if (stack_realign_fp)
9337 {
9338 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9339 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9340
9341 /* Record last valid frame pointer offset. */
9342 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
9343
9344 /* The computation of the size of the re-aligned stack frame means
9345 that we must allocate the size of the register save area before
9346 performing the actual alignment. Otherwise we cannot guarantee
9347 that there's enough storage above the realignment point. */
9348 allocate = frame.reg_save_offset - m->fs.sp_offset
9349 + frame.stack_realign_allocate;
9350 if (allocate)
9351 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9352 GEN_INT (-allocate), style: -1, set_cfa: false);
9353
9354 /* Align the stack. */
9355 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
9356 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
9357 m->fs.sp_realigned_offset = m->fs.sp_offset
9358 - frame.stack_realign_allocate;
9359 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
9360 Beyond this point, stack access should be done via choose_baseaddr or
9361 by using sp_valid_at and fp_valid_at to determine the correct base
9362 register. Henceforth, any CFA offset should be thought of as logical
9363 and not physical. */
9364 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
9365 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
9366 m->fs.sp_realigned = true;
9367
9368 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
9369 is needed to describe where a register is saved using a realigned
9370 stack pointer, so we need to invalidate the stack pointer for that
9371 target. */
9372 if (TARGET_SEH)
9373 m->fs.sp_valid = false;
9374
9375 /* If SP offset is non-immediate after allocation of the stack frame,
9376 then emit SSE saves or stub call prior to allocating the rest of the
9377 stack frame. This is less efficient for the out-of-line stub because
9378 we can't combine allocations across the call barrier, but it's better
9379 than using a scratch register. */
9380 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
9381 - m->fs.sp_realigned_offset),
9382 Pmode))
9383 {
9384 if (!sse_registers_saved)
9385 {
9386 ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset);
9387 sse_registers_saved = true;
9388 }
9389 else if (save_stub_call_needed)
9390 {
9391 ix86_emit_outlined_ms2sysv_save (frame);
9392 save_stub_call_needed = false;
9393 }
9394 }
9395 }
9396
9397 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9398
9399 if (flag_stack_usage_info)
9400 {
9401 /* We start to count from ARG_POINTER. */
9402 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9403
9404 /* If it was realigned, take into account the fake frame. */
9405 if (stack_realign_drap)
9406 {
9407 if (ix86_static_chain_on_stack)
9408 stack_size += UNITS_PER_WORD;
9409
9410 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9411 stack_size += UNITS_PER_WORD;
9412
9413 /* This over-estimates by 1 minimal-stack-alignment-unit but
9414 mitigates that by counting in the new return address slot. */
9415 current_function_dynamic_stack_size
9416 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9417 }
9418
9419 current_function_static_stack_size = stack_size;
9420 }
9421
9422 /* On SEH target with very large frame size, allocate an area to save
9423 SSE registers (as the very large allocation won't be described). */
9424 if (TARGET_SEH
9425 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
9426 && !sse_registers_saved)
9427 {
9428 HOST_WIDE_INT sse_size
9429 = frame.sse_reg_save_offset - frame.reg_save_offset;
9430
9431 gcc_assert (int_registers_saved);
9432
9433 /* No need to do stack checking as the area will be immediately
9434 written. */
9435 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9436 GEN_INT (-sse_size), style: -1,
9437 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9438 allocate -= sse_size;
9439 ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset);
9440 sse_registers_saved = true;
9441 }
9442
9443 /* If stack clash protection is requested, then probe the stack, unless it
9444 is already probed on the target. */
9445 if (allocate >= 0
9446 && flag_stack_clash_protection
9447 && !ix86_target_stack_probe ())
9448 {
9449 ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: false);
9450 allocate = 0;
9451 }
9452
9453 /* The stack has already been decremented by the instruction calling us
9454 so probe if the size is non-negative to preserve the protection area. */
9455 else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9456 {
9457 const HOST_WIDE_INT probe_interval = get_probe_interval ();
9458
9459 if (STACK_CHECK_MOVING_SP)
9460 {
9461 if (crtl->is_leaf
9462 && !cfun->calls_alloca
9463 && allocate <= probe_interval)
9464 ;
9465
9466 else
9467 {
9468 ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: true);
9469 allocate = 0;
9470 }
9471 }
9472
9473 else
9474 {
9475 HOST_WIDE_INT size = allocate;
9476
9477 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
9478 size = 0x80000000 - get_stack_check_protect () - 1;
9479
9480 if (TARGET_STACK_PROBE)
9481 {
9482 if (crtl->is_leaf && !cfun->calls_alloca)
9483 {
9484 if (size > probe_interval)
9485 ix86_emit_probe_stack_range (first: 0, size, int_registers_saved);
9486 }
9487 else
9488 ix86_emit_probe_stack_range (first: 0,
9489 size: size + get_stack_check_protect (),
9490 int_registers_saved);
9491 }
9492 else
9493 {
9494 if (crtl->is_leaf && !cfun->calls_alloca)
9495 {
9496 if (size > probe_interval
9497 && size > get_stack_check_protect ())
9498 ix86_emit_probe_stack_range (first: get_stack_check_protect (),
9499 size: (size
9500 - get_stack_check_protect ()),
9501 int_registers_saved);
9502 }
9503 else
9504 ix86_emit_probe_stack_range (first: get_stack_check_protect (), size,
9505 int_registers_saved);
9506 }
9507 }
9508 }
9509
9510 if (allocate == 0)
9511 ;
9512 else if (!ix86_target_stack_probe ()
9513 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9514 {
9515 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9516 GEN_INT (-allocate), style: -1,
9517 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9518 }
9519 else
9520 {
9521 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9522 rtx r10 = NULL;
9523 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
9524 bool eax_live = ix86_eax_live_at_start_p ();
9525 bool r10_live = false;
9526
9527 if (TARGET_64BIT)
9528 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9529
9530 if (eax_live)
9531 {
9532 insn = emit_insn (gen_push (arg: eax));
9533 allocate -= UNITS_PER_WORD;
9534 /* Note that SEH directives need to continue tracking the stack
9535 pointer even after the frame pointer has been set up. */
9536 if (sp_is_cfa_reg || TARGET_SEH)
9537 {
9538 if (sp_is_cfa_reg)
9539 m->fs.cfa_offset += UNITS_PER_WORD;
9540 RTX_FRAME_RELATED_P (insn) = 1;
9541 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9542 gen_rtx_SET (stack_pointer_rtx,
9543 plus_constant (Pmode,
9544 stack_pointer_rtx,
9545 -UNITS_PER_WORD)));
9546 }
9547 }
9548
9549 if (r10_live)
9550 {
9551 r10 = gen_rtx_REG (Pmode, R10_REG);
9552 insn = emit_insn (gen_push (arg: r10));
9553 allocate -= UNITS_PER_WORD;
9554 if (sp_is_cfa_reg || TARGET_SEH)
9555 {
9556 if (sp_is_cfa_reg)
9557 m->fs.cfa_offset += UNITS_PER_WORD;
9558 RTX_FRAME_RELATED_P (insn) = 1;
9559 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9560 gen_rtx_SET (stack_pointer_rtx,
9561 plus_constant (Pmode,
9562 stack_pointer_rtx,
9563 -UNITS_PER_WORD)));
9564 }
9565 }
9566
9567 emit_move_insn (eax, GEN_INT (allocate));
9568 emit_insn (gen_allocate_stack_worker_probe (Pmode, x0: eax, x1: eax));
9569
9570 /* Use the fact that AX still contains ALLOCATE. */
9571 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
9572 (Pmode, stack_pointer_rtx, stack_pointer_rtx, x2: eax));
9573
9574 if (sp_is_cfa_reg || TARGET_SEH)
9575 {
9576 if (sp_is_cfa_reg)
9577 m->fs.cfa_offset += allocate;
9578 RTX_FRAME_RELATED_P (insn) = 1;
9579 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9580 gen_rtx_SET (stack_pointer_rtx,
9581 plus_constant (Pmode, stack_pointer_rtx,
9582 -allocate)));
9583 }
9584 m->fs.sp_offset += allocate;
9585
9586 /* Use stack_pointer_rtx for relative addressing so that code works for
9587 realigned stack. But this means that we need a blockage to prevent
9588 stores based on the frame pointer from being scheduled before. */
9589 if (r10_live && eax_live)
9590 {
9591 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9592 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9593 gen_frame_mem (word_mode, t));
9594 t = plus_constant (Pmode, t, UNITS_PER_WORD);
9595 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
9596 gen_frame_mem (word_mode, t));
9597 emit_insn (gen_memory_blockage ());
9598 }
9599 else if (eax_live || r10_live)
9600 {
9601 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9602 emit_move_insn (gen_rtx_REG (word_mode,
9603 (eax_live ? AX_REG : R10_REG)),
9604 gen_frame_mem (word_mode, t));
9605 emit_insn (gen_memory_blockage ());
9606 }
9607 }
9608 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9609
9610 /* If we havn't already set up the frame pointer, do so now. */
9611 if (frame_pointer_needed && !m->fs.fp_valid)
9612 {
9613 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
9614 GEN_INT (frame.stack_pointer_offset
9615 - frame.hard_frame_pointer_offset));
9616 insn = emit_insn (insn);
9617 RTX_FRAME_RELATED_P (insn) = 1;
9618 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
9619
9620 if (m->fs.cfa_reg == stack_pointer_rtx)
9621 m->fs.cfa_reg = hard_frame_pointer_rtx;
9622 m->fs.fp_offset = frame.hard_frame_pointer_offset;
9623 m->fs.fp_valid = true;
9624 }
9625
9626 if (!int_registers_saved)
9627 ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset);
9628 if (!sse_registers_saved)
9629 ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset);
9630 else if (save_stub_call_needed)
9631 ix86_emit_outlined_ms2sysv_save (frame);
9632
9633 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
9634 in PROLOGUE. */
9635 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
9636 {
9637 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
9638 insn = emit_insn (gen_set_got (pic));
9639 RTX_FRAME_RELATED_P (insn) = 1;
9640 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
9641 emit_insn (gen_prologue_use (pic));
9642 /* Deleting already emmitted SET_GOT if exist and allocated to
9643 REAL_PIC_OFFSET_TABLE_REGNUM. */
9644 ix86_elim_entry_set_got (reg: pic);
9645 }
9646
9647 if (crtl->drap_reg && !crtl->stack_realign_needed)
9648 {
9649 /* vDRAP is setup but after reload it turns out stack realign
9650 isn't necessary, here we will emit prologue to setup DRAP
9651 without stack realign adjustment */
9652 t = choose_baseaddr (cfa_offset: 0, NULL);
9653 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9654 }
9655
9656 /* Prevent instructions from being scheduled into register save push
9657 sequence when access to the redzone area is done through frame pointer.
9658 The offset between the frame pointer and the stack pointer is calculated
9659 relative to the value of the stack pointer at the end of the function
9660 prologue, and moving instructions that access redzone area via frame
9661 pointer inside push sequence violates this assumption. */
9662 if (frame_pointer_needed && frame.red_zone_size)
9663 emit_insn (gen_memory_blockage ());
9664
9665 /* SEH requires that the prologue end within 256 bytes of the start of
9666 the function. Prevent instruction schedules that would extend that.
9667 Further, prevent alloca modifications to the stack pointer from being
9668 combined with prologue modifications. */
9669 if (TARGET_SEH)
9670 emit_insn (gen_prologue_use (stack_pointer_rtx));
9671}
9672
9673/* Emit code to restore REG using a POP or POPP insn. */
9674
9675static void
9676ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
9677{
9678 struct machine_function *m = cfun->machine;
9679 rtx_insn *insn = emit_insn (gen_pop (arg: reg, ppx_p));
9680
9681 ix86_add_cfa_restore_note (insn, reg, cfa_offset: m->fs.sp_offset);
9682 m->fs.sp_offset -= UNITS_PER_WORD;
9683
9684 if (m->fs.cfa_reg == crtl->drap_reg
9685 && REGNO (reg) == REGNO (crtl->drap_reg))
9686 {
9687 /* Previously we'd represented the CFA as an expression
9688 like *(%ebp - 8). We've just popped that value from
9689 the stack, which means we need to reset the CFA to
9690 the drap register. This will remain until we restore
9691 the stack pointer. */
9692 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9693 RTX_FRAME_RELATED_P (insn) = 1;
9694
9695 /* This means that the DRAP register is valid for addressing too. */
9696 m->fs.drap_valid = true;
9697 return;
9698 }
9699
9700 if (m->fs.cfa_reg == stack_pointer_rtx)
9701 {
9702 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9703 x = gen_rtx_SET (stack_pointer_rtx, x);
9704 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9705 RTX_FRAME_RELATED_P (insn) = 1;
9706
9707 m->fs.cfa_offset -= UNITS_PER_WORD;
9708 }
9709
9710 /* When the frame pointer is the CFA, and we pop it, we are
9711 swapping back to the stack pointer as the CFA. This happens
9712 for stack frames that don't allocate other data, so we assume
9713 the stack pointer is now pointing at the return address, i.e.
9714 the function entry state, which makes the offset be 1 word. */
9715 if (reg == hard_frame_pointer_rtx)
9716 {
9717 m->fs.fp_valid = false;
9718 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9719 {
9720 m->fs.cfa_reg = stack_pointer_rtx;
9721 m->fs.cfa_offset -= UNITS_PER_WORD;
9722
9723 add_reg_note (insn, REG_CFA_DEF_CFA,
9724 plus_constant (Pmode, stack_pointer_rtx,
9725 m->fs.cfa_offset));
9726 RTX_FRAME_RELATED_P (insn) = 1;
9727 }
9728 }
9729}
9730
9731/* Emit code to restore REG using a POP2 insn. */
9732static void
9733ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
9734{
9735 struct machine_function *m = cfun->machine;
9736 const int offset = UNITS_PER_WORD * 2;
9737 rtx_insn *insn;
9738
9739 rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
9740 stack_pointer_rtx));
9741
9742 if (ppx_p)
9743 insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
9744 else
9745 insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
9746
9747 RTX_FRAME_RELATED_P (insn) = 1;
9748
9749 rtx dwarf = NULL_RTX;
9750 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
9751 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
9752 REG_NOTES (insn) = dwarf;
9753 m->fs.sp_offset -= offset;
9754
9755 if (m->fs.cfa_reg == crtl->drap_reg
9756 && (REGNO (reg1) == REGNO (crtl->drap_reg)
9757 || REGNO (reg2) == REGNO (crtl->drap_reg)))
9758 {
9759 /* Previously we'd represented the CFA as an expression
9760 like *(%ebp - 8). We've just popped that value from
9761 the stack, which means we need to reset the CFA to
9762 the drap register. This will remain until we restore
9763 the stack pointer. */
9764 add_reg_note (insn, REG_CFA_DEF_CFA,
9765 REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
9766 RTX_FRAME_RELATED_P (insn) = 1;
9767
9768 /* This means that the DRAP register is valid for addressing too. */
9769 m->fs.drap_valid = true;
9770 return;
9771 }
9772
9773 if (m->fs.cfa_reg == stack_pointer_rtx)
9774 {
9775 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
9776 x = gen_rtx_SET (stack_pointer_rtx, x);
9777 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9778 RTX_FRAME_RELATED_P (insn) = 1;
9779
9780 m->fs.cfa_offset -= offset;
9781 }
9782
9783 /* When the frame pointer is the CFA, and we pop it, we are
9784 swapping back to the stack pointer as the CFA. This happens
9785 for stack frames that don't allocate other data, so we assume
9786 the stack pointer is now pointing at the return address, i.e.
9787 the function entry state, which makes the offset be 1 word. */
9788 if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
9789 {
9790 m->fs.fp_valid = false;
9791 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9792 {
9793 m->fs.cfa_reg = stack_pointer_rtx;
9794 m->fs.cfa_offset -= offset;
9795
9796 add_reg_note (insn, REG_CFA_DEF_CFA,
9797 plus_constant (Pmode, stack_pointer_rtx,
9798 m->fs.cfa_offset));
9799 RTX_FRAME_RELATED_P (insn) = 1;
9800 }
9801 }
9802}
9803
9804/* Emit code to restore saved registers using POP insns. */
9805
9806static void
9807ix86_emit_restore_regs_using_pop (bool ppx_p)
9808{
9809 unsigned int regno;
9810
9811 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9812 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true))
9813 ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno), ppx_p);
9814}
9815
9816/* Emit code to restore saved registers using POP2 insns. */
9817
9818static void
9819ix86_emit_restore_regs_using_pop2 (void)
9820{
9821 int regno;
9822 int regno_list[2];
9823 regno_list[0] = regno_list[1] = -1;
9824 int loaded_regnum = 0;
9825 bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
9826
9827 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9828 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true))
9829 {
9830 if (aligned)
9831 {
9832 regno_list[loaded_regnum++] = regno;
9833 if (loaded_regnum == 2)
9834 {
9835 gcc_assert (regno_list[0] != -1
9836 && regno_list[1] != -1
9837 && regno_list[0] != regno_list[1]);
9838
9839 ix86_emit_restore_reg_using_pop2 (reg1: gen_rtx_REG (word_mode,
9840 regno_list[0]),
9841 reg2: gen_rtx_REG (word_mode,
9842 regno_list[1]),
9843 TARGET_APX_PPX);
9844 loaded_regnum = 0;
9845 regno_list[0] = regno_list[1] = -1;
9846 }
9847 }
9848 else
9849 {
9850 ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno),
9851 TARGET_APX_PPX);
9852 aligned = true;
9853 }
9854 }
9855
9856 if (loaded_regnum == 1)
9857 ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno_list[0]),
9858 TARGET_APX_PPX);
9859}
9860
9861/* Emit code and notes for the LEAVE instruction. If insn is non-null,
9862 omits the emit and only attaches the notes. */
9863
9864static void
9865ix86_emit_leave (rtx_insn *insn)
9866{
9867 struct machine_function *m = cfun->machine;
9868
9869 if (!insn)
9870 insn = emit_insn (gen_leave (arg0: word_mode));
9871
9872 ix86_add_queued_cfa_restore_notes (insn);
9873
9874 gcc_assert (m->fs.fp_valid);
9875 m->fs.sp_valid = true;
9876 m->fs.sp_realigned = false;
9877 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9878 m->fs.fp_valid = false;
9879
9880 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9881 {
9882 m->fs.cfa_reg = stack_pointer_rtx;
9883 m->fs.cfa_offset = m->fs.sp_offset;
9884
9885 add_reg_note (insn, REG_CFA_DEF_CFA,
9886 plus_constant (Pmode, stack_pointer_rtx,
9887 m->fs.sp_offset));
9888 RTX_FRAME_RELATED_P (insn) = 1;
9889 }
9890 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9891 cfa_offset: m->fs.fp_offset);
9892}
9893
9894/* Emit code to restore saved registers using MOV insns.
9895 First register is restored from CFA - CFA_OFFSET. */
9896static void
9897ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9898 bool maybe_eh_return)
9899{
9900 struct machine_function *m = cfun->machine;
9901 unsigned int regno;
9902
9903 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9904 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true))
9905 {
9906
9907 /* Skip registers, already processed by shrink wrap separate. */
9908 if (!cfun->machine->reg_is_wrapped_separately[regno])
9909 {
9910 rtx reg = gen_rtx_REG (word_mode, regno);
9911 rtx mem;
9912 rtx_insn *insn;
9913
9914 mem = choose_baseaddr (cfa_offset, NULL);
9915 mem = gen_frame_mem (word_mode, mem);
9916 insn = emit_move_insn (reg, mem);
9917
9918 if (m->fs.cfa_reg == crtl->drap_reg
9919 && regno == REGNO (crtl->drap_reg))
9920 {
9921 /* Previously we'd represented the CFA as an expression
9922 like *(%ebp - 8). We've just popped that value from
9923 the stack, which means we need to reset the CFA to
9924 the drap register. This will remain until we restore
9925 the stack pointer. */
9926 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9927 RTX_FRAME_RELATED_P (insn) = 1;
9928
9929 /* DRAP register is valid for addressing. */
9930 m->fs.drap_valid = true;
9931 }
9932 else
9933 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9934 }
9935 cfa_offset -= UNITS_PER_WORD;
9936 }
9937}
9938
9939/* Emit code to restore saved registers using MOV insns.
9940 First register is restored from CFA - CFA_OFFSET. */
9941static void
9942ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9943 bool maybe_eh_return)
9944{
9945 unsigned int regno;
9946
9947 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9948 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true))
9949 {
9950 rtx reg = gen_rtx_REG (V4SFmode, regno);
9951 rtx mem;
9952 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
9953
9954 mem = choose_baseaddr (cfa_offset, align: &align);
9955 mem = gen_rtx_MEM (V4SFmode, mem);
9956
9957 /* The location aligment depends upon the base register. */
9958 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
9959 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
9960 set_mem_align (mem, align);
9961 emit_insn (gen_rtx_SET (reg, mem));
9962
9963 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9964
9965 cfa_offset -= GET_MODE_SIZE (V4SFmode);
9966 }
9967}
9968
9969static void
9970ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
9971 bool use_call, int style)
9972{
9973 struct machine_function *m = cfun->machine;
9974 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9975 + m->call_ms2sysv_extra_regs;
9976 rtvec v;
9977 unsigned int elems_needed, align, i, vi = 0;
9978 rtx_insn *insn;
9979 rtx sym, tmp;
9980 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
9981 rtx r10 = NULL_RTX;
9982 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9983 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
9984 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
9985 rtx rsi_frame_load = NULL_RTX;
9986 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
9987 enum xlogue_stub stub;
9988
9989 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
9990
9991 /* If using a realigned stack, we should never start with padding. */
9992 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
9993
9994 /* Setup RSI as the stub's base pointer. */
9995 align = GET_MODE_ALIGNMENT (V4SFmode);
9996 tmp = choose_baseaddr (cfa_offset: rsi_offset, align: &align, SI_REG);
9997 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9998
9999 emit_insn (gen_rtx_SET (rsi, tmp));
10000
10001 /* Get a symbol for the stub. */
10002 if (frame_pointer_needed)
10003 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
10004 : XLOGUE_STUB_RESTORE_HFP_TAIL;
10005 else
10006 stub = use_call ? XLOGUE_STUB_RESTORE
10007 : XLOGUE_STUB_RESTORE_TAIL;
10008 sym = xlogue.get_stub_rtx (stub);
10009
10010 elems_needed = ncregs;
10011 if (use_call)
10012 elems_needed += 1;
10013 else
10014 elems_needed += frame_pointer_needed ? 5 : 3;
10015 v = rtvec_alloc (elems_needed);
10016
10017 /* We call the epilogue stub when we need to pop incoming args or we are
10018 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
10019 epilogue stub and it is the tail-call. */
10020 if (use_call)
10021 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
10022 else
10023 {
10024 RTVEC_ELT (v, vi++) = ret_rtx;
10025 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
10026 if (frame_pointer_needed)
10027 {
10028 rtx rbp = gen_rtx_REG (DImode, BP_REG);
10029 gcc_assert (m->fs.fp_valid);
10030 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
10031
10032 tmp = plus_constant (DImode, rbp, 8);
10033 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
10034 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
10035 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10036 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
10037 }
10038 else
10039 {
10040 /* If no hard frame pointer, we set R10 to the SP restore value. */
10041 gcc_assert (!m->fs.fp_valid);
10042 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10043 gcc_assert (m->fs.sp_valid);
10044
10045 r10 = gen_rtx_REG (DImode, R10_REG);
10046 tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
10047 emit_insn (gen_rtx_SET (r10, tmp));
10048
10049 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
10050 }
10051 }
10052
10053 /* Generate frame load insns and restore notes. */
10054 for (i = 0; i < ncregs; ++i)
10055 {
10056 const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i);
10057 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
10058 rtx reg, frame_load;
10059
10060 reg = gen_rtx_REG (mode, r.regno);
10061 frame_load = gen_frame_load (reg, frame_reg: rsi, offset: r.offset);
10062
10063 /* Save RSI frame load insn & note to add last. */
10064 if (r.regno == SI_REG)
10065 {
10066 gcc_assert (!rsi_frame_load);
10067 rsi_frame_load = frame_load;
10068 rsi_restore_offset = r.offset;
10069 }
10070 else
10071 {
10072 RTVEC_ELT (v, vi++) = frame_load;
10073 ix86_add_cfa_restore_note (NULL, reg, cfa_offset: r.offset);
10074 }
10075 }
10076
10077 /* Add RSI frame load & restore note at the end. */
10078 gcc_assert (rsi_frame_load);
10079 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
10080 RTVEC_ELT (v, vi++) = rsi_frame_load;
10081 ix86_add_cfa_restore_note (NULL, reg: gen_rtx_REG (DImode, SI_REG),
10082 cfa_offset: rsi_restore_offset);
10083
10084 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
10085 if (!use_call && !frame_pointer_needed)
10086 {
10087 gcc_assert (m->fs.sp_valid);
10088 gcc_assert (!m->fs.sp_realigned);
10089
10090 /* At this point, R10 should point to frame.stack_realign_offset. */
10091 if (m->fs.cfa_reg == stack_pointer_rtx)
10092 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
10093 m->fs.sp_offset = frame.stack_realign_offset;
10094 }
10095
10096 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
10097 tmp = gen_rtx_PARALLEL (VOIDmode, v);
10098 if (use_call)
10099 insn = emit_insn (tmp);
10100 else
10101 {
10102 insn = emit_jump_insn (tmp);
10103 JUMP_LABEL (insn) = ret_rtx;
10104
10105 if (frame_pointer_needed)
10106 ix86_emit_leave (insn);
10107 else
10108 {
10109 /* Need CFA adjust note. */
10110 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
10111 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
10112 }
10113 }
10114
10115 RTX_FRAME_RELATED_P (insn) = true;
10116 ix86_add_queued_cfa_restore_notes (insn);
10117
10118 /* If we're not doing a tail-call, we need to adjust the stack. */
10119 if (use_call && m->fs.sp_valid)
10120 {
10121 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
10122 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10123 GEN_INT (dealloc), style,
10124 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
10125 }
10126}
10127
10128/* Restore function stack, frame, and registers. */
10129
10130void
10131ix86_expand_epilogue (int style)
10132{
10133 struct machine_function *m = cfun->machine;
10134 struct machine_frame_state frame_state_save = m->fs;
10135 bool restore_regs_via_mov;
10136 bool using_drap;
10137 bool restore_stub_is_tail = false;
10138
10139 if (ix86_function_naked (fn: current_function_decl))
10140 {
10141 /* The program should not reach this point. */
10142 emit_insn (gen_ud2 ());
10143 return;
10144 }
10145
10146 ix86_finalize_stack_frame_flags ();
10147 const struct ix86_frame &frame = cfun->machine->frame;
10148
10149 m->fs.sp_realigned = stack_realign_fp;
10150 m->fs.sp_valid = stack_realign_fp
10151 || !frame_pointer_needed
10152 || crtl->sp_is_unchanging;
10153 gcc_assert (!m->fs.sp_valid
10154 || m->fs.sp_offset == frame.stack_pointer_offset);
10155
10156 /* The FP must be valid if the frame pointer is present. */
10157 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10158 gcc_assert (!m->fs.fp_valid
10159 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10160
10161 /* We must have *some* valid pointer to the stack frame. */
10162 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10163
10164 /* The DRAP is never valid at this point. */
10165 gcc_assert (!m->fs.drap_valid);
10166
10167 /* See the comment about red zone and frame
10168 pointer usage in ix86_expand_prologue. */
10169 if (frame_pointer_needed && frame.red_zone_size)
10170 emit_insn (gen_memory_blockage ());
10171
10172 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10173 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10174
10175 /* Determine the CFA offset of the end of the red-zone. */
10176 m->fs.red_zone_offset = 0;
10177 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10178 {
10179 /* The red-zone begins below return address and error code in
10180 exception handler. */
10181 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
10182
10183 /* When the register save area is in the aligned portion of
10184 the stack, determine the maximum runtime displacement that
10185 matches up with the aligned frame. */
10186 if (stack_realign_drap)
10187 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10188 + UNITS_PER_WORD);
10189 }
10190
10191 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
10192
10193 /* Special care must be taken for the normal return case of a function
10194 using eh_return: the eax and edx registers are marked as saved, but
10195 not restored along this path. Adjust the save location to match. */
10196 if (crtl->calls_eh_return && style != 2)
10197 reg_save_offset -= 2 * UNITS_PER_WORD;
10198
10199 /* EH_RETURN requires the use of moves to function properly. */
10200 if (crtl->calls_eh_return)
10201 restore_regs_via_mov = true;
10202 /* SEH requires the use of pops to identify the epilogue. */
10203 else if (TARGET_SEH)
10204 restore_regs_via_mov = false;
10205 /* If we already save reg with pushp, don't use move at epilogue. */
10206 else if (m->fs.apx_ppx_used)
10207 restore_regs_via_mov = false;
10208 /* If we're only restoring one register and sp cannot be used then
10209 using a move instruction to restore the register since it's
10210 less work than reloading sp and popping the register. */
10211 else if (!sp_valid_at (cfa_offset: frame.hfp_save_offset) && frame.nregs <= 1)
10212 restore_regs_via_mov = true;
10213 else if (crtl->shrink_wrapped_separate
10214 || (TARGET_EPILOGUE_USING_MOVE
10215 && cfun->machine->use_fast_prologue_epilogue
10216 && (frame.nregs > 1
10217 || m->fs.sp_offset != reg_save_offset)))
10218 restore_regs_via_mov = true;
10219 else if (frame_pointer_needed
10220 && !frame.nregs
10221 && m->fs.sp_offset != reg_save_offset)
10222 restore_regs_via_mov = true;
10223 else if (frame_pointer_needed
10224 && TARGET_USE_LEAVE
10225 && cfun->machine->use_fast_prologue_epilogue
10226 && frame.nregs == 1)
10227 restore_regs_via_mov = true;
10228 else
10229 restore_regs_via_mov = false;
10230
10231 if (crtl->shrink_wrapped_separate)
10232 gcc_assert (restore_regs_via_mov);
10233
10234 if (restore_regs_via_mov || frame.nsseregs)
10235 {
10236 /* Ensure that the entire register save area is addressable via
10237 the stack pointer, if we will restore SSE regs via sp. */
10238 if (TARGET_64BIT
10239 && m->fs.sp_offset > 0x7fffffff
10240 && sp_valid_at (cfa_offset: frame.stack_realign_offset + 1)
10241 && (frame.nsseregs + frame.nregs) != 0)
10242 {
10243 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10244 GEN_INT (m->fs.sp_offset
10245 - frame.sse_reg_save_offset),
10246 style,
10247 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
10248 }
10249 }
10250
10251 /* If there are any SSE registers to restore, then we have to do it
10252 via moves, since there's obviously no pop for SSE regs. */
10253 if (frame.nsseregs)
10254 ix86_emit_restore_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset,
10255 maybe_eh_return: style == 2);
10256
10257 if (m->call_ms2sysv)
10258 {
10259 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
10260
10261 /* We cannot use a tail-call for the stub if:
10262 1. We have to pop incoming args,
10263 2. We have additional int regs to restore, or
10264 3. A sibling call will be the tail-call, or
10265 4. We are emitting an eh_return_internal epilogue.
10266
10267 TODO: Item 4 has not yet tested!
10268
10269 If any of the above are true, we will call the stub rather than
10270 jump to it. */
10271 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
10272 ix86_emit_outlined_ms2sysv_restore (frame, use_call: !restore_stub_is_tail, style);
10273 }
10274
10275 /* If using out-of-line stub that is a tail-call, then...*/
10276 if (m->call_ms2sysv && restore_stub_is_tail)
10277 {
10278 /* TODO: parinoid tests. (remove eventually) */
10279 gcc_assert (m->fs.sp_valid);
10280 gcc_assert (!m->fs.sp_realigned);
10281 gcc_assert (!m->fs.fp_valid);
10282 gcc_assert (!m->fs.realigned);
10283 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
10284 gcc_assert (!crtl->drap_reg);
10285 gcc_assert (!frame.nregs);
10286 gcc_assert (!crtl->shrink_wrapped_separate);
10287 }
10288 else if (restore_regs_via_mov)
10289 {
10290 rtx t;
10291
10292 if (frame.nregs)
10293 ix86_emit_restore_regs_using_mov (cfa_offset: reg_save_offset, maybe_eh_return: style == 2);
10294
10295 /* eh_return epilogues need %ecx added to the stack pointer. */
10296 if (style == 2)
10297 {
10298 rtx sa = EH_RETURN_STACKADJ_RTX;
10299 rtx_insn *insn;
10300
10301 gcc_assert (!crtl->shrink_wrapped_separate);
10302
10303 /* Stack realignment doesn't work with eh_return. */
10304 if (crtl->stack_realign_needed)
10305 sorry ("Stack realignment not supported with "
10306 "%<__builtin_eh_return%>");
10307
10308 /* regparm nested functions don't work with eh_return. */
10309 if (ix86_static_chain_on_stack)
10310 sorry ("regparm nested function not supported with "
10311 "%<__builtin_eh_return%>");
10312
10313 if (frame_pointer_needed)
10314 {
10315 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10316 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
10317 emit_insn (gen_rtx_SET (sa, t));
10318
10319 /* NB: eh_return epilogues must restore the frame pointer
10320 in word_mode since the upper 32 bits of RBP register
10321 can have any values. */
10322 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
10323 rtx frame_reg = gen_rtx_REG (word_mode,
10324 HARD_FRAME_POINTER_REGNUM);
10325 insn = emit_move_insn (frame_reg, t);
10326
10327 /* Note that we use SA as a temporary CFA, as the return
10328 address is at the proper place relative to it. We
10329 pretend this happens at the FP restore insn because
10330 prior to this insn the FP would be stored at the wrong
10331 offset relative to SA, and after this insn we have no
10332 other reasonable register to use for the CFA. We don't
10333 bother resetting the CFA to the SP for the duration of
10334 the return insn, unless the control flow instrumentation
10335 is done. In this case the SP is used later and we have
10336 to reset CFA to SP. */
10337 add_reg_note (insn, REG_CFA_DEF_CFA,
10338 plus_constant (Pmode, sa, UNITS_PER_WORD));
10339 ix86_add_queued_cfa_restore_notes (insn);
10340 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
10341 RTX_FRAME_RELATED_P (insn) = 1;
10342
10343 m->fs.cfa_reg = sa;
10344 m->fs.cfa_offset = UNITS_PER_WORD;
10345 m->fs.fp_valid = false;
10346
10347 pro_epilogue_adjust_stack (stack_pointer_rtx, src: sa,
10348 const0_rtx, style,
10349 flag_cf_protection);
10350 }
10351 else
10352 {
10353 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10354 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
10355 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
10356 ix86_add_queued_cfa_restore_notes (insn);
10357
10358 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10359 if (m->fs.cfa_offset != UNITS_PER_WORD)
10360 {
10361 m->fs.cfa_offset = UNITS_PER_WORD;
10362 add_reg_note (insn, REG_CFA_DEF_CFA,
10363 plus_constant (Pmode, stack_pointer_rtx,
10364 UNITS_PER_WORD));
10365 RTX_FRAME_RELATED_P (insn) = 1;
10366 }
10367 }
10368 m->fs.sp_offset = UNITS_PER_WORD;
10369 m->fs.sp_valid = true;
10370 m->fs.sp_realigned = false;
10371 }
10372 }
10373 else
10374 {
10375 /* SEH requires that the function end with (1) a stack adjustment
10376 if necessary, (2) a sequence of pops, and (3) a return or
10377 jump instruction. Prevent insns from the function body from
10378 being scheduled into this sequence. */
10379 if (TARGET_SEH)
10380 {
10381 /* Prevent a catch region from being adjacent to the standard
10382 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
10383 nor several other flags that would be interesting to test are
10384 set up yet. */
10385 if (flag_non_call_exceptions)
10386 emit_insn (gen_nops (const1_rtx));
10387 else
10388 emit_insn (gen_blockage ());
10389 }
10390
10391 /* First step is to deallocate the stack frame so that we can
10392 pop the registers. If the stack pointer was realigned, it needs
10393 to be restored now. Also do it on SEH target for very large
10394 frame as the emitted instructions aren't allowed by the ABI
10395 in epilogues. */
10396 if (!m->fs.sp_valid || m->fs.sp_realigned
10397 || (TARGET_SEH
10398 && (m->fs.sp_offset - reg_save_offset
10399 >= SEH_MAX_FRAME_SIZE)))
10400 {
10401 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10402 GEN_INT (m->fs.fp_offset
10403 - reg_save_offset),
10404 style, set_cfa: false);
10405 }
10406 else if (m->fs.sp_offset != reg_save_offset)
10407 {
10408 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10409 GEN_INT (m->fs.sp_offset
10410 - reg_save_offset),
10411 style,
10412 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
10413 }
10414
10415 if (TARGET_APX_PUSH2POP2
10416 && ix86_can_use_push2pop2 ()
10417 && m->func_type == TYPE_NORMAL)
10418 ix86_emit_restore_regs_using_pop2 ();
10419 else
10420 ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
10421 }
10422
10423 /* If we used a stack pointer and haven't already got rid of it,
10424 then do so now. */
10425 if (m->fs.fp_valid)
10426 {
10427 /* If the stack pointer is valid and pointing at the frame
10428 pointer store address, then we only need a pop. */
10429 if (sp_valid_at (cfa_offset: frame.hfp_save_offset)
10430 && m->fs.sp_offset == frame.hfp_save_offset)
10431 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10432 /* Leave results in shorter dependency chains on CPUs that are
10433 able to grok it fast. */
10434 else if (TARGET_USE_LEAVE
10435 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
10436 || !cfun->machine->use_fast_prologue_epilogue)
10437 ix86_emit_leave (NULL);
10438 else
10439 {
10440 pro_epilogue_adjust_stack (stack_pointer_rtx,
10441 hard_frame_pointer_rtx,
10442 const0_rtx, style, set_cfa: !using_drap);
10443 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10444 }
10445 }
10446
10447 if (using_drap)
10448 {
10449 int param_ptr_offset = UNITS_PER_WORD;
10450 rtx_insn *insn;
10451
10452 gcc_assert (stack_realign_drap);
10453
10454 if (ix86_static_chain_on_stack)
10455 param_ptr_offset += UNITS_PER_WORD;
10456 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10457 param_ptr_offset += UNITS_PER_WORD;
10458
10459 insn = emit_insn (gen_rtx_SET
10460 (stack_pointer_rtx,
10461 plus_constant (Pmode, crtl->drap_reg,
10462 -param_ptr_offset)));
10463 m->fs.cfa_reg = stack_pointer_rtx;
10464 m->fs.cfa_offset = param_ptr_offset;
10465 m->fs.sp_offset = param_ptr_offset;
10466 m->fs.realigned = false;
10467
10468 add_reg_note (insn, REG_CFA_DEF_CFA,
10469 plus_constant (Pmode, stack_pointer_rtx,
10470 param_ptr_offset));
10471 RTX_FRAME_RELATED_P (insn) = 1;
10472
10473 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10474 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10475 }
10476
10477 /* At this point the stack pointer must be valid, and we must have
10478 restored all of the registers. We may not have deallocated the
10479 entire stack frame. We've delayed this until now because it may
10480 be possible to merge the local stack deallocation with the
10481 deallocation forced by ix86_static_chain_on_stack. */
10482 gcc_assert (m->fs.sp_valid);
10483 gcc_assert (!m->fs.sp_realigned);
10484 gcc_assert (!m->fs.fp_valid);
10485 gcc_assert (!m->fs.realigned);
10486 if (m->fs.sp_offset != UNITS_PER_WORD)
10487 {
10488 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10489 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10490 style, set_cfa: true);
10491 }
10492 else
10493 ix86_add_queued_cfa_restore_notes (insn: get_last_insn ());
10494
10495 /* Sibcall epilogues don't want a return instruction. */
10496 if (style == 0)
10497 {
10498 m->fs = frame_state_save;
10499 return;
10500 }
10501
10502 if (cfun->machine->func_type != TYPE_NORMAL)
10503 emit_jump_insn (gen_interrupt_return ());
10504 else if (crtl->args.pops_args && crtl->args.size)
10505 {
10506 rtx popc = GEN_INT (crtl->args.pops_args);
10507
10508 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10509 address, do explicit add, and jump indirectly to the caller. */
10510
10511 if (crtl->args.pops_args >= 65536)
10512 {
10513 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10514 rtx_insn *insn;
10515
10516 /* There is no "pascal" calling convention in any 64bit ABI. */
10517 gcc_assert (!TARGET_64BIT);
10518
10519 insn = emit_insn (gen_pop (arg: ecx));
10520 m->fs.cfa_offset -= UNITS_PER_WORD;
10521 m->fs.sp_offset -= UNITS_PER_WORD;
10522
10523 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10524 x = gen_rtx_SET (stack_pointer_rtx, x);
10525 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10526 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10527 RTX_FRAME_RELATED_P (insn) = 1;
10528
10529 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10530 offset: popc, style: -1, set_cfa: true);
10531 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10532 }
10533 else
10534 emit_jump_insn (gen_simple_return_pop_internal (popc));
10535 }
10536 else if (!m->call_ms2sysv || !restore_stub_is_tail)
10537 {
10538 /* In case of return from EH a simple return cannot be used
10539 as a return address will be compared with a shadow stack
10540 return address. Use indirect jump instead. */
10541 if (style == 2 && flag_cf_protection)
10542 {
10543 /* Register used in indirect jump must be in word_mode. But
10544 Pmode may not be the same as word_mode for x32. */
10545 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
10546 rtx_insn *insn;
10547
10548 insn = emit_insn (gen_pop (arg: ecx));
10549 m->fs.cfa_offset -= UNITS_PER_WORD;
10550 m->fs.sp_offset -= UNITS_PER_WORD;
10551
10552 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10553 x = gen_rtx_SET (stack_pointer_rtx, x);
10554 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10555 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10556 RTX_FRAME_RELATED_P (insn) = 1;
10557
10558 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10559 }
10560 else
10561 emit_jump_insn (gen_simple_return_internal ());
10562 }
10563
10564 /* Restore the state back to the state from the prologue,
10565 so that it's correct for the next epilogue. */
10566 m->fs = frame_state_save;
10567}
10568
10569/* Reset from the function's potential modifications. */
10570
10571static void
10572ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
10573{
10574 if (pic_offset_table_rtx
10575 && !ix86_use_pseudo_pic_reg ())
10576 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10577
10578 if (TARGET_MACHO)
10579 {
10580 rtx_insn *insn = get_last_insn ();
10581 rtx_insn *deleted_debug_label = NULL;
10582
10583 /* Mach-O doesn't support labels at the end of objects, so if
10584 it looks like we might want one, take special action.
10585 First, collect any sequence of deleted debug labels. */
10586 while (insn
10587 && NOTE_P (insn)
10588 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10589 {
10590 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10591 notes only, instead set their CODE_LABEL_NUMBER to -1,
10592 otherwise there would be code generation differences
10593 in between -g and -g0. */
10594 if (NOTE_P (insn) && NOTE_KIND (insn)
10595 == NOTE_INSN_DELETED_DEBUG_LABEL)
10596 deleted_debug_label = insn;
10597 insn = PREV_INSN (insn);
10598 }
10599
10600 /* If we have:
10601 label:
10602 barrier
10603 then this needs to be detected, so skip past the barrier. */
10604
10605 if (insn && BARRIER_P (insn))
10606 insn = PREV_INSN (insn);
10607
10608 /* Up to now we've only seen notes or barriers. */
10609 if (insn)
10610 {
10611 if (LABEL_P (insn)
10612 || (NOTE_P (insn)
10613 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
10614 /* Trailing label. */
10615 fputs (s: "\tnop\n", stream: file);
10616 else if (cfun && ! cfun->is_thunk)
10617 {
10618 /* See if we have a completely empty function body, skipping
10619 the special case of the picbase thunk emitted as asm. */
10620 while (insn && ! INSN_P (insn))
10621 insn = PREV_INSN (insn);
10622 /* If we don't find any insns, we've got an empty function body;
10623 I.e. completely empty - without a return or branch. This is
10624 taken as the case where a function body has been removed
10625 because it contains an inline __builtin_unreachable(). GCC
10626 declares that reaching __builtin_unreachable() means UB so
10627 we're not obliged to do anything special; however, we want
10628 non-zero-sized function bodies. To meet this, and help the
10629 user out, let's trap the case. */
10630 if (insn == NULL)
10631 fputs (s: "\tud2\n", stream: file);
10632 }
10633 }
10634 else if (deleted_debug_label)
10635 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
10636 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
10637 CODE_LABEL_NUMBER (insn) = -1;
10638 }
10639}
10640
10641/* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
10642
10643void
10644ix86_print_patchable_function_entry (FILE *file,
10645 unsigned HOST_WIDE_INT patch_area_size,
10646 bool record_p)
10647{
10648 if (cfun->machine->function_label_emitted)
10649 {
10650 /* NB: When ix86_print_patchable_function_entry is called after
10651 function table has been emitted, we have inserted or queued
10652 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
10653 place. There is nothing to do here. */
10654 return;
10655 }
10656
10657 default_print_patchable_function_entry (file, patch_area_size,
10658 record_p);
10659}
10660
10661/* Output patchable area. NB: default_print_patchable_function_entry
10662 isn't available in i386.md. */
10663
10664void
10665ix86_output_patchable_area (unsigned int patch_area_size,
10666 bool record_p)
10667{
10668 default_print_patchable_function_entry (asm_out_file,
10669 patch_area_size,
10670 record_p);
10671}
10672
10673/* Return a scratch register to use in the split stack prologue. The
10674 split stack prologue is used for -fsplit-stack. It is the first
10675 instructions in the function, even before the regular prologue.
10676 The scratch register can be any caller-saved register which is not
10677 used for parameters or for the static chain. */
10678
10679static unsigned int
10680split_stack_prologue_scratch_regno (void)
10681{
10682 if (TARGET_64BIT)
10683 return R11_REG;
10684 else
10685 {
10686 bool is_fastcall, is_thiscall;
10687 int regparm;
10688
10689 is_fastcall = (lookup_attribute (attr_name: "fastcall",
10690 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10691 != NULL);
10692 is_thiscall = (lookup_attribute (attr_name: "thiscall",
10693 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10694 != NULL);
10695 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10696
10697 if (is_fastcall)
10698 {
10699 if (DECL_STATIC_CHAIN (cfun->decl))
10700 {
10701 sorry ("%<-fsplit-stack%> does not support fastcall with "
10702 "nested function");
10703 return INVALID_REGNUM;
10704 }
10705 return AX_REG;
10706 }
10707 else if (is_thiscall)
10708 {
10709 if (!DECL_STATIC_CHAIN (cfun->decl))
10710 return DX_REG;
10711 return AX_REG;
10712 }
10713 else if (regparm < 3)
10714 {
10715 if (!DECL_STATIC_CHAIN (cfun->decl))
10716 return CX_REG;
10717 else
10718 {
10719 if (regparm >= 2)
10720 {
10721 sorry ("%<-fsplit-stack%> does not support 2 register "
10722 "parameters for a nested function");
10723 return INVALID_REGNUM;
10724 }
10725 return DX_REG;
10726 }
10727 }
10728 else
10729 {
10730 /* FIXME: We could make this work by pushing a register
10731 around the addition and comparison. */
10732 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
10733 return INVALID_REGNUM;
10734 }
10735 }
10736}
10737
10738/* A SYMBOL_REF for the function which allocates new stackspace for
10739 -fsplit-stack. */
10740
10741static GTY(()) rtx split_stack_fn;
10742
10743/* A SYMBOL_REF for the more stack function when using the large model. */
10744
10745static GTY(()) rtx split_stack_fn_large;
10746
10747/* Return location of the stack guard value in the TLS block. */
10748
10749rtx
10750ix86_split_stack_guard (void)
10751{
10752 int offset;
10753 addr_space_t as = DEFAULT_TLS_SEG_REG;
10754 rtx r;
10755
10756 gcc_assert (flag_split_stack);
10757
10758#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
10759 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
10760#else
10761 gcc_unreachable ();
10762#endif
10763
10764 r = GEN_INT (offset);
10765 r = gen_const_mem (Pmode, r);
10766 set_mem_addr_space (r, as);
10767
10768 return r;
10769}
10770
10771/* Handle -fsplit-stack. These are the first instructions in the
10772 function, even before the regular prologue. */
10773
10774void
10775ix86_expand_split_stack_prologue (void)
10776{
10777 HOST_WIDE_INT allocate;
10778 unsigned HOST_WIDE_INT args_size;
10779 rtx_code_label *label;
10780 rtx limit, current, allocate_rtx, call_fusage;
10781 rtx_insn *call_insn;
10782 unsigned int scratch_regno = INVALID_REGNUM;
10783 rtx scratch_reg = NULL_RTX;
10784 rtx_code_label *varargs_label = NULL;
10785 rtx fn;
10786
10787 gcc_assert (flag_split_stack && reload_completed);
10788
10789 ix86_finalize_stack_frame_flags ();
10790 struct ix86_frame &frame = cfun->machine->frame;
10791 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10792
10793 /* This is the label we will branch to if we have enough stack
10794 space. We expect the basic block reordering pass to reverse this
10795 branch if optimizing, so that we branch in the unlikely case. */
10796 label = gen_label_rtx ();
10797
10798 /* We need to compare the stack pointer minus the frame size with
10799 the stack boundary in the TCB. The stack boundary always gives
10800 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10801 can compare directly. Otherwise we need to do an addition. */
10802
10803 limit = ix86_split_stack_guard ();
10804
10805 if (allocate >= SPLIT_STACK_AVAILABLE
10806 || flag_force_indirect_call)
10807 {
10808 scratch_regno = split_stack_prologue_scratch_regno ();
10809 if (scratch_regno == INVALID_REGNUM)
10810 return;
10811 }
10812
10813 if (allocate >= SPLIT_STACK_AVAILABLE)
10814 {
10815 rtx offset;
10816
10817 /* We need a scratch register to hold the stack pointer minus
10818 the required frame size. Since this is the very start of the
10819 function, the scratch register can be any caller-saved
10820 register which is not used for parameters. */
10821 offset = GEN_INT (- allocate);
10822
10823 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10824 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10825 {
10826 /* We don't use gen_add in this case because it will
10827 want to split to lea, but when not optimizing the insn
10828 will not be split after this point. */
10829 emit_insn (gen_rtx_SET (scratch_reg,
10830 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10831 offset)));
10832 }
10833 else
10834 {
10835 emit_move_insn (scratch_reg, offset);
10836 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
10837 }
10838 current = scratch_reg;
10839 }
10840 else
10841 current = stack_pointer_rtx;
10842
10843 ix86_expand_branch (GEU, current, limit, label);
10844 rtx_insn *jump_insn = get_last_insn ();
10845 JUMP_LABEL (jump_insn) = label;
10846
10847 /* Mark the jump as very likely to be taken. */
10848 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
10849
10850 if (split_stack_fn == NULL_RTX)
10851 {
10852 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10853 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
10854 }
10855 fn = split_stack_fn;
10856
10857 /* Get more stack space. We pass in the desired stack space and the
10858 size of the arguments to copy to the new stack. In 32-bit mode
10859 we push the parameters; __morestack will return on a new stack
10860 anyhow. In 64-bit mode we pass the parameters in r10 and
10861 r11. */
10862 allocate_rtx = GEN_INT (allocate);
10863 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
10864 call_fusage = NULL_RTX;
10865 rtx pop = NULL_RTX;
10866 if (TARGET_64BIT)
10867 {
10868 rtx reg10, reg11;
10869
10870 reg10 = gen_rtx_REG (DImode, R10_REG);
10871 reg11 = gen_rtx_REG (DImode, R11_REG);
10872
10873 /* If this function uses a static chain, it will be in %r10.
10874 Preserve it across the call to __morestack. */
10875 if (DECL_STATIC_CHAIN (cfun->decl))
10876 {
10877 rtx rax;
10878
10879 rax = gen_rtx_REG (word_mode, AX_REG);
10880 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
10881 use_reg (fusage: &call_fusage, reg: rax);
10882 }
10883
10884 if (flag_force_indirect_call
10885 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10886 {
10887 HOST_WIDE_INT argval;
10888
10889 if (split_stack_fn_large == NULL_RTX)
10890 {
10891 split_stack_fn_large
10892 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10893 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
10894 }
10895
10896 fn = split_stack_fn_large;
10897
10898 if (ix86_cmodel == CM_LARGE_PIC)
10899 {
10900 rtx_code_label *label;
10901 rtx x;
10902
10903 gcc_assert (Pmode == DImode);
10904
10905 label = gen_label_rtx ();
10906 emit_label (label);
10907 LABEL_PRESERVE_P (label) = 1;
10908 emit_insn (gen_set_rip_rex64 (reg10, label));
10909 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10910 emit_insn (gen_add2_insn (reg10, reg11));
10911 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
10912 x = gen_rtx_CONST (Pmode, x);
10913 emit_move_insn (reg11, x);
10914 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10915 x = gen_const_mem (Pmode, x);
10916 fn = copy_to_suggested_reg (x, reg11, Pmode);
10917 }
10918 else if (ix86_cmodel == CM_LARGE)
10919 fn = copy_to_suggested_reg (fn, reg11, Pmode);
10920
10921 /* When using the large model we need to load the address
10922 into a register, and we've run out of registers. So we
10923 switch to a different calling convention, and we call a
10924 different function: __morestack_large. We pass the
10925 argument size in the upper 32 bits of r10 and pass the
10926 frame size in the lower 32 bits. */
10927 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
10928 gcc_assert ((args_size & 0xffffffff) == args_size);
10929
10930 argval = ((args_size << 16) << 16) + allocate;
10931 emit_move_insn (reg10, GEN_INT (argval));
10932 }
10933 else
10934 {
10935 emit_move_insn (reg10, allocate_rtx);
10936 emit_move_insn (reg11, GEN_INT (args_size));
10937 use_reg (fusage: &call_fusage, reg: reg11);
10938 }
10939
10940 use_reg (fusage: &call_fusage, reg: reg10);
10941 }
10942 else
10943 {
10944 if (flag_force_indirect_call && flag_pic)
10945 {
10946 rtx x;
10947
10948 gcc_assert (Pmode == SImode);
10949
10950 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10951
10952 emit_insn (gen_set_got (scratch_reg));
10953 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
10954 UNSPEC_GOT);
10955 x = gen_rtx_CONST (Pmode, x);
10956 x = gen_rtx_PLUS (Pmode, scratch_reg, x);
10957 x = gen_const_mem (Pmode, x);
10958 fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
10959 }
10960
10961 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
10962 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
10963 insn = emit_insn (gen_push (arg: allocate_rtx));
10964 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
10965 pop = GEN_INT (2 * UNITS_PER_WORD);
10966 }
10967
10968 if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
10969 {
10970 scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
10971
10972 if (GET_MODE (fn) != word_mode)
10973 fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
10974
10975 fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
10976 }
10977
10978 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10979 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10980 pop, false);
10981 add_function_usage_to (call_insn, call_fusage);
10982 if (!TARGET_64BIT)
10983 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
10984 /* Indicate that this function can't jump to non-local gotos. */
10985 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
10986
10987 /* In order to make call/return prediction work right, we now need
10988 to execute a return instruction. See
10989 libgcc/config/i386/morestack.S for the details on how this works.
10990
10991 For flow purposes gcc must not see this as a return
10992 instruction--we need control flow to continue at the subsequent
10993 label. Therefore, we use an unspec. */
10994 gcc_assert (crtl->args.pops_args < 65536);
10995 rtx_insn *ret_insn
10996 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10997
10998 if ((flag_cf_protection & CF_BRANCH))
10999 {
11000 /* Insert ENDBR since __morestack will jump back here via indirect
11001 call. */
11002 rtx cet_eb = gen_nop_endbr ();
11003 emit_insn_after (cet_eb, ret_insn);
11004 }
11005
11006 /* If we are in 64-bit mode and this function uses a static chain,
11007 we saved %r10 in %rax before calling _morestack. */
11008 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11009 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11010 gen_rtx_REG (word_mode, AX_REG));
11011
11012 /* If this function calls va_start, we need to store a pointer to
11013 the arguments on the old stack, because they may not have been
11014 all copied to the new stack. At this point the old stack can be
11015 found at the frame pointer value used by __morestack, because
11016 __morestack has set that up before calling back to us. Here we
11017 store that pointer in a scratch register, and in
11018 ix86_expand_prologue we store the scratch register in a stack
11019 slot. */
11020 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11021 {
11022 rtx frame_reg;
11023 int words;
11024
11025 scratch_regno = split_stack_prologue_scratch_regno ();
11026 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11027 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11028
11029 /* 64-bit:
11030 fp -> old fp value
11031 return address within this function
11032 return address of caller of this function
11033 stack arguments
11034 So we add three words to get to the stack arguments.
11035
11036 32-bit:
11037 fp -> old fp value
11038 return address within this function
11039 first argument to __morestack
11040 second argument to __morestack
11041 return address of caller of this function
11042 stack arguments
11043 So we add five words to get to the stack arguments.
11044 */
11045 words = TARGET_64BIT ? 3 : 5;
11046 emit_insn (gen_rtx_SET (scratch_reg,
11047 plus_constant (Pmode, frame_reg,
11048 words * UNITS_PER_WORD)));
11049
11050 varargs_label = gen_label_rtx ();
11051 emit_jump_insn (gen_jump (varargs_label));
11052 JUMP_LABEL (get_last_insn ()) = varargs_label;
11053
11054 emit_barrier ();
11055 }
11056
11057 emit_label (label);
11058 LABEL_NUSES (label) = 1;
11059
11060 /* If this function calls va_start, we now have to set the scratch
11061 register for the case where we do not call __morestack. In this
11062 case we need to set it based on the stack pointer. */
11063 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11064 {
11065 emit_insn (gen_rtx_SET (scratch_reg,
11066 plus_constant (Pmode, stack_pointer_rtx,
11067 UNITS_PER_WORD)));
11068
11069 emit_label (varargs_label);
11070 LABEL_NUSES (varargs_label) = 1;
11071 }
11072}
11073
11074/* We may have to tell the dataflow pass that the split stack prologue
11075 is initializing a scratch register. */
11076
11077static void
11078ix86_live_on_entry (bitmap regs)
11079{
11080 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11081 {
11082 gcc_assert (flag_split_stack);
11083 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11084 }
11085}
11086
11087/* Extract the parts of an RTL expression that is a valid memory address
11088 for an instruction. Return false if the structure of the address is
11089 grossly off. */
11090
11091bool
11092ix86_decompose_address (rtx addr, struct ix86_address *out)
11093{
11094 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11095 rtx base_reg, index_reg;
11096 HOST_WIDE_INT scale = 1;
11097 rtx scale_rtx = NULL_RTX;
11098 rtx tmp;
11099 addr_space_t seg = ADDR_SPACE_GENERIC;
11100
11101 /* Allow zero-extended SImode addresses,
11102 they will be emitted with addr32 prefix. */
11103 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11104 {
11105 if (GET_CODE (addr) == ZERO_EXTEND
11106 && GET_MODE (XEXP (addr, 0)) == SImode)
11107 {
11108 addr = XEXP (addr, 0);
11109 if (CONST_INT_P (addr))
11110 return false;
11111 }
11112 else if (GET_CODE (addr) == AND)
11113 {
11114 rtx mask = XEXP (addr, 1);
11115 rtx shift_val;
11116
11117 if (const_32bit_mask (mask, DImode)
11118 /* For ASHIFT inside AND, combine will not generate
11119 canonical zero-extend. Merge mask for AND and shift_count
11120 to check if it is canonical zero-extend. */
11121 || (CONST_INT_P (mask)
11122 && GET_CODE (XEXP (addr, 0)) == ASHIFT
11123 && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1))
11124 && ((UINTVAL (mask)
11125 | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1))
11126 == HOST_WIDE_INT_UC (0xffffffff))))
11127 {
11128 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
11129 if (addr == NULL_RTX)
11130 return false;
11131
11132 if (CONST_INT_P (addr))
11133 return false;
11134 }
11135 }
11136 }
11137
11138 /* Allow SImode subregs of DImode addresses,
11139 they will be emitted with addr32 prefix. */
11140 if (TARGET_64BIT && GET_MODE (addr) == SImode)
11141 {
11142 if (SUBREG_P (addr)
11143 && GET_MODE (SUBREG_REG (addr)) == DImode)
11144 {
11145 addr = SUBREG_REG (addr);
11146 if (CONST_INT_P (addr))
11147 return false;
11148 }
11149 }
11150
11151 if (REG_P (addr))
11152 base = addr;
11153 else if (SUBREG_P (addr))
11154 {
11155 if (REG_P (SUBREG_REG (addr)))
11156 base = addr;
11157 else
11158 return false;
11159 }
11160 else if (GET_CODE (addr) == PLUS)
11161 {
11162 rtx addends[4], op;
11163 int n = 0, i;
11164
11165 op = addr;
11166 do
11167 {
11168 if (n >= 4)
11169 return false;
11170 addends[n++] = XEXP (op, 1);
11171 op = XEXP (op, 0);
11172 }
11173 while (GET_CODE (op) == PLUS);
11174 if (n >= 4)
11175 return false;
11176 addends[n] = op;
11177
11178 for (i = n; i >= 0; --i)
11179 {
11180 op = addends[i];
11181 switch (GET_CODE (op))
11182 {
11183 case MULT:
11184 if (index)
11185 return false;
11186 index = XEXP (op, 0);
11187 scale_rtx = XEXP (op, 1);
11188 break;
11189
11190 case ASHIFT:
11191 if (index)
11192 return false;
11193 index = XEXP (op, 0);
11194 tmp = XEXP (op, 1);
11195 if (!CONST_INT_P (tmp))
11196 return false;
11197 scale = INTVAL (tmp);
11198 if ((unsigned HOST_WIDE_INT) scale > 3)
11199 return false;
11200 scale = 1 << scale;
11201 break;
11202
11203 case ZERO_EXTEND:
11204 op = XEXP (op, 0);
11205 if (GET_CODE (op) != UNSPEC)
11206 return false;
11207 /* FALLTHRU */
11208
11209 case UNSPEC:
11210 if (XINT (op, 1) == UNSPEC_TP
11211 && TARGET_TLS_DIRECT_SEG_REFS
11212 && seg == ADDR_SPACE_GENERIC)
11213 seg = DEFAULT_TLS_SEG_REG;
11214 else
11215 return false;
11216 break;
11217
11218 case SUBREG:
11219 if (!REG_P (SUBREG_REG (op)))
11220 return false;
11221 /* FALLTHRU */
11222
11223 case REG:
11224 if (!base)
11225 base = op;
11226 else if (!index)
11227 index = op;
11228 else
11229 return false;
11230 break;
11231
11232 case CONST:
11233 case CONST_INT:
11234 case SYMBOL_REF:
11235 case LABEL_REF:
11236 if (disp)
11237 return false;
11238 disp = op;
11239 break;
11240
11241 default:
11242 return false;
11243 }
11244 }
11245 }
11246 else if (GET_CODE (addr) == MULT)
11247 {
11248 index = XEXP (addr, 0); /* index*scale */
11249 scale_rtx = XEXP (addr, 1);
11250 }
11251 else if (GET_CODE (addr) == ASHIFT)
11252 {
11253 /* We're called for lea too, which implements ashift on occasion. */
11254 index = XEXP (addr, 0);
11255 tmp = XEXP (addr, 1);
11256 if (!CONST_INT_P (tmp))
11257 return false;
11258 scale = INTVAL (tmp);
11259 if ((unsigned HOST_WIDE_INT) scale > 3)
11260 return false;
11261 scale = 1 << scale;
11262 }
11263 else
11264 disp = addr; /* displacement */
11265
11266 if (index)
11267 {
11268 if (REG_P (index))
11269 ;
11270 else if (SUBREG_P (index)
11271 && REG_P (SUBREG_REG (index)))
11272 ;
11273 else
11274 return false;
11275 }
11276
11277 /* Extract the integral value of scale. */
11278 if (scale_rtx)
11279 {
11280 if (!CONST_INT_P (scale_rtx))
11281 return false;
11282 scale = INTVAL (scale_rtx);
11283 }
11284
11285 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
11286 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
11287
11288 /* Avoid useless 0 displacement. */
11289 if (disp == const0_rtx && (base || index))
11290 disp = NULL_RTX;
11291
11292 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11293 if (base_reg && index_reg && scale == 1
11294 && (REGNO (index_reg) == ARG_POINTER_REGNUM
11295 || REGNO (index_reg) == FRAME_POINTER_REGNUM
11296 || REGNO (index_reg) == SP_REG))
11297 {
11298 std::swap (a&: base, b&: index);
11299 std::swap (a&: base_reg, b&: index_reg);
11300 }
11301
11302 /* Special case: %ebp cannot be encoded as a base without a displacement.
11303 Similarly %r13. */
11304 if (!disp && base_reg
11305 && (REGNO (base_reg) == ARG_POINTER_REGNUM
11306 || REGNO (base_reg) == FRAME_POINTER_REGNUM
11307 || REGNO (base_reg) == BP_REG
11308 || REGNO (base_reg) == R13_REG))
11309 disp = const0_rtx;
11310
11311 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11312 Avoid this by transforming to [%esi+0].
11313 Reload calls address legitimization without cfun defined, so we need
11314 to test cfun for being non-NULL. */
11315 if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
11316 && base_reg && !index_reg && !disp
11317 && REGNO (base_reg) == SI_REG)
11318 disp = const0_rtx;
11319
11320 /* Special case: encode reg+reg instead of reg*2. */
11321 if (!base && index && scale == 2)
11322 base = index, base_reg = index_reg, scale = 1;
11323
11324 /* Special case: scaling cannot be encoded without base or displacement. */
11325 if (!base && !disp && index && scale != 1)
11326 disp = const0_rtx;
11327
11328 out->base = base;
11329 out->index = index;
11330 out->disp = disp;
11331 out->scale = scale;
11332 out->seg = seg;
11333
11334 return true;
11335}
11336
11337/* Return cost of the memory address x.
11338 For i386, it is better to use a complex address than let gcc copy
11339 the address into a reg and make a new pseudo. But not if the address
11340 requires to two regs - that would mean more pseudos with longer
11341 lifetimes. */
11342static int
11343ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
11344{
11345 struct ix86_address parts;
11346 int cost = 1;
11347 int ok = ix86_decompose_address (addr: x, out: &parts);
11348
11349 gcc_assert (ok);
11350
11351 if (parts.base && SUBREG_P (parts.base))
11352 parts.base = SUBREG_REG (parts.base);
11353 if (parts.index && SUBREG_P (parts.index))
11354 parts.index = SUBREG_REG (parts.index);
11355
11356 /* Attempt to minimize number of registers in the address by increasing
11357 address cost for each used register. We don't increase address cost
11358 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
11359 is not invariant itself it most likely means that base or index is not
11360 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
11361 which is not profitable for x86. */
11362 if (parts.base
11363 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11364 && (current_pass->type == GIMPLE_PASS
11365 || !pic_offset_table_rtx
11366 || !REG_P (parts.base)
11367 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
11368 cost++;
11369
11370 if (parts.index
11371 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11372 && (current_pass->type == GIMPLE_PASS
11373 || !pic_offset_table_rtx
11374 || !REG_P (parts.index)
11375 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
11376 cost++;
11377
11378 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11379 since it's predecode logic can't detect the length of instructions
11380 and it degenerates to vector decoded. Increase cost of such
11381 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11382 to split such addresses or even refuse such addresses at all.
11383
11384 Following addressing modes are affected:
11385 [base+scale*index]
11386 [scale*index+disp]
11387 [base+index]
11388
11389 The first and last case may be avoidable by explicitly coding the zero in
11390 memory address, but I don't have AMD-K6 machine handy to check this
11391 theory. */
11392
11393 if (TARGET_CPU_P (K6)
11394 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11395 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11396 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11397 cost += 10;
11398
11399 return cost;
11400}
11401
11402/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
11403
11404bool
11405ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
11406 unsigned int align,
11407 enum by_pieces_operation op,
11408 bool speed_p)
11409{
11410 /* Return true when we are currently expanding memcpy/memset epilogue
11411 with move_by_pieces or store_by_pieces. */
11412 if (cfun->machine->by_pieces_in_use)
11413 return true;
11414
11415 return default_use_by_pieces_infrastructure_p (size, align, op,
11416 speed_p);
11417}
11418
11419/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11420 this is used for to form addresses to local data when -fPIC is in
11421 use. */
11422
11423static bool
11424darwin_local_data_pic (rtx disp)
11425{
11426 return (GET_CODE (disp) == UNSPEC
11427 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11428}
11429
11430/* True if the function symbol operand X should be loaded from GOT.
11431 If CALL_P is true, X is a call operand.
11432
11433 NB: -mno-direct-extern-access doesn't force load from GOT for
11434 call.
11435
11436 NB: In 32-bit mode, only non-PIC is allowed in inline assembly
11437 statements, since a PIC register could not be available at the
11438 call site. */
11439
11440bool
11441ix86_force_load_from_GOT_p (rtx x, bool call_p)
11442{
11443 return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
11444 && !TARGET_PECOFF && !TARGET_MACHO
11445 && (!flag_pic || this_is_asm_operands)
11446 && ix86_cmodel != CM_LARGE
11447 && ix86_cmodel != CM_LARGE_PIC
11448 && SYMBOL_REF_P (x)
11449 && ((!call_p
11450 && (!ix86_direct_extern_access
11451 || (SYMBOL_REF_DECL (x)
11452 && lookup_attribute (attr_name: "nodirect_extern_access",
11453 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
11454 || (SYMBOL_REF_FUNCTION_P (x)
11455 && (!flag_plt
11456 || (SYMBOL_REF_DECL (x)
11457 && lookup_attribute (attr_name: "noplt",
11458 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
11459 && !SYMBOL_REF_LOCAL_P (x));
11460}
11461
11462/* Determine if a given RTX is a valid constant. We already know this
11463 satisfies CONSTANT_P. */
11464
11465static bool
11466ix86_legitimate_constant_p (machine_mode mode, rtx x)
11467{
11468 switch (GET_CODE (x))
11469 {
11470 case CONST:
11471 x = XEXP (x, 0);
11472
11473 if (GET_CODE (x) == PLUS)
11474 {
11475 if (!CONST_INT_P (XEXP (x, 1)))
11476 return false;
11477 x = XEXP (x, 0);
11478 }
11479
11480 if (TARGET_MACHO && darwin_local_data_pic (disp: x))
11481 return true;
11482
11483 /* Only some unspecs are valid as "constants". */
11484 if (GET_CODE (x) == UNSPEC)
11485 switch (XINT (x, 1))
11486 {
11487 case UNSPEC_GOT:
11488 case UNSPEC_GOTOFF:
11489 case UNSPEC_PLTOFF:
11490 return TARGET_64BIT;
11491 case UNSPEC_TPOFF:
11492 case UNSPEC_NTPOFF:
11493 x = XVECEXP (x, 0, 0);
11494 return (SYMBOL_REF_P (x)
11495 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11496 case UNSPEC_DTPOFF:
11497 x = XVECEXP (x, 0, 0);
11498 return (SYMBOL_REF_P (x)
11499 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11500 case UNSPEC_SECREL32:
11501 x = XVECEXP (x, 0, 0);
11502 return SYMBOL_REF_P (x);
11503 default:
11504 return false;
11505 }
11506
11507 /* We must have drilled down to a symbol. */
11508 if (LABEL_REF_P (x))
11509 return true;
11510 if (!SYMBOL_REF_P (x))
11511 return false;
11512 /* FALLTHRU */
11513
11514 case SYMBOL_REF:
11515 /* TLS symbols are never valid. */
11516 if (SYMBOL_REF_TLS_MODEL (x))
11517 return false;
11518
11519 /* DLLIMPORT symbols are never valid. */
11520 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11521 && SYMBOL_REF_DLLIMPORT_P (x))
11522 return false;
11523
11524#if TARGET_MACHO
11525 /* mdynamic-no-pic */
11526 if (MACHO_DYNAMIC_NO_PIC_P)
11527 return machopic_symbol_defined_p (x);
11528#endif
11529
11530 /* External function address should be loaded
11531 via the GOT slot to avoid PLT. */
11532 if (ix86_force_load_from_GOT_p (x))
11533 return false;
11534
11535 break;
11536
11537 CASE_CONST_SCALAR_INT:
11538 if (ix86_endbr_immediate_operand (x, VOIDmode))
11539 return false;
11540
11541 switch (mode)
11542 {
11543 case E_TImode:
11544 if (TARGET_64BIT)
11545 return true;
11546 /* FALLTHRU */
11547 case E_OImode:
11548 case E_XImode:
11549 if (!standard_sse_constant_p (x, pred_mode: mode)
11550 && GET_MODE_SIZE (TARGET_AVX512F
11551 ? XImode
11552 : (TARGET_AVX
11553 ? OImode
11554 : (TARGET_SSE2
11555 ? TImode : DImode))) < GET_MODE_SIZE (mode))
11556 return false;
11557 default:
11558 break;
11559 }
11560 break;
11561
11562 case CONST_VECTOR:
11563 if (!standard_sse_constant_p (x, pred_mode: mode))
11564 return false;
11565 break;
11566
11567 case CONST_DOUBLE:
11568 if (mode == E_BFmode)
11569 return false;
11570
11571 default:
11572 break;
11573 }
11574
11575 /* Otherwise we handle everything else in the move patterns. */
11576 return true;
11577}
11578
11579/* Determine if it's legal to put X into the constant pool. This
11580 is not possible for the address of thread-local symbols, which
11581 is checked above. */
11582
11583static bool
11584ix86_cannot_force_const_mem (machine_mode mode, rtx x)
11585{
11586 /* We can put any immediate constant in memory. */
11587 switch (GET_CODE (x))
11588 {
11589 CASE_CONST_ANY:
11590 return false;
11591
11592 default:
11593 break;
11594 }
11595
11596 return !ix86_legitimate_constant_p (mode, x);
11597}
11598
11599/* Return a unique alias set for the GOT. */
11600
11601alias_set_type
11602ix86_GOT_alias_set (void)
11603{
11604 static alias_set_type set = -1;
11605 if (set == -1)
11606 set = new_alias_set ();
11607 return set;
11608}
11609
11610/* Nonzero if the constant value X is a legitimate general operand
11611 when generating PIC code. It is given that flag_pic is on and
11612 that X satisfies CONSTANT_P. */
11613
11614bool
11615legitimate_pic_operand_p (rtx x)
11616{
11617 rtx inner;
11618
11619 switch (GET_CODE (x))
11620 {
11621 case CONST:
11622 inner = XEXP (x, 0);
11623 if (GET_CODE (inner) == PLUS
11624 && CONST_INT_P (XEXP (inner, 1)))
11625 inner = XEXP (inner, 0);
11626
11627 /* Only some unspecs are valid as "constants". */
11628 if (GET_CODE (inner) == UNSPEC)
11629 switch (XINT (inner, 1))
11630 {
11631 case UNSPEC_GOT:
11632 case UNSPEC_GOTOFF:
11633 case UNSPEC_PLTOFF:
11634 return TARGET_64BIT;
11635 case UNSPEC_TPOFF:
11636 x = XVECEXP (inner, 0, 0);
11637 return (SYMBOL_REF_P (x)
11638 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11639 case UNSPEC_SECREL32:
11640 x = XVECEXP (inner, 0, 0);
11641 return SYMBOL_REF_P (x);
11642 case UNSPEC_MACHOPIC_OFFSET:
11643 return legitimate_pic_address_disp_p (x);
11644 default:
11645 return false;
11646 }
11647 /* FALLTHRU */
11648
11649 case SYMBOL_REF:
11650 case LABEL_REF:
11651 return legitimate_pic_address_disp_p (x);
11652
11653 default:
11654 return true;
11655 }
11656}
11657
11658/* Determine if a given CONST RTX is a valid memory displacement
11659 in PIC mode. */
11660
11661bool
11662legitimate_pic_address_disp_p (rtx disp)
11663{
11664 bool saw_plus;
11665
11666 /* In 64bit mode we can allow direct addresses of symbols and labels
11667 when they are not dynamic symbols. */
11668 if (TARGET_64BIT)
11669 {
11670 rtx op0 = disp, op1;
11671
11672 switch (GET_CODE (disp))
11673 {
11674 case LABEL_REF:
11675 return true;
11676
11677 case CONST:
11678 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11679 break;
11680 op0 = XEXP (XEXP (disp, 0), 0);
11681 op1 = XEXP (XEXP (disp, 0), 1);
11682 if (!CONST_INT_P (op1))
11683 break;
11684 if (GET_CODE (op0) == UNSPEC
11685 && (XINT (op0, 1) == UNSPEC_DTPOFF
11686 || XINT (op0, 1) == UNSPEC_NTPOFF)
11687 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
11688 return true;
11689 if (INTVAL (op1) >= 16*1024*1024
11690 || INTVAL (op1) < -16*1024*1024)
11691 break;
11692 if (LABEL_REF_P (op0))
11693 return true;
11694 if (GET_CODE (op0) == CONST
11695 && GET_CODE (XEXP (op0, 0)) == UNSPEC
11696 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11697 return true;
11698 if (GET_CODE (op0) == UNSPEC
11699 && XINT (op0, 1) == UNSPEC_PCREL)
11700 return true;
11701 if (!SYMBOL_REF_P (op0))
11702 break;
11703 /* FALLTHRU */
11704
11705 case SYMBOL_REF:
11706 /* TLS references should always be enclosed in UNSPEC.
11707 The dllimported symbol needs always to be resolved. */
11708 if (SYMBOL_REF_TLS_MODEL (op0)
11709 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
11710 return false;
11711
11712 if (TARGET_PECOFF)
11713 {
11714#if TARGET_PECOFF
11715 if (is_imported_p (op0))
11716 return true;
11717#endif
11718
11719 if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
11720 break;
11721
11722 /* Non-external-weak function symbols need to be resolved only
11723 for the large model. Non-external symbols don't need to be
11724 resolved for large and medium models. For the small model,
11725 we don't need to resolve anything here. */
11726 if ((ix86_cmodel != CM_LARGE_PIC
11727 && SYMBOL_REF_FUNCTION_P (op0)
11728 && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
11729 || !SYMBOL_REF_EXTERNAL_P (op0)
11730 || ix86_cmodel == CM_SMALL_PIC)
11731 return true;
11732 }
11733 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
11734 && (SYMBOL_REF_LOCAL_P (op0)
11735 || ((ix86_direct_extern_access
11736 && !(SYMBOL_REF_DECL (op0)
11737 && lookup_attribute (attr_name: "nodirect_extern_access",
11738 DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
11739 && HAVE_LD_PIE_COPYRELOC
11740 && flag_pie
11741 && !SYMBOL_REF_WEAK (op0)
11742 && !SYMBOL_REF_FUNCTION_P (op0)))
11743 && ix86_cmodel != CM_LARGE_PIC)
11744 return true;
11745 break;
11746
11747 default:
11748 break;
11749 }
11750 }
11751 if (GET_CODE (disp) != CONST)
11752 return false;
11753 disp = XEXP (disp, 0);
11754
11755 if (TARGET_64BIT)
11756 {
11757 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11758 of GOT tables. We should not need these anyway. */
11759 if (GET_CODE (disp) != UNSPEC
11760 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11761 && XINT (disp, 1) != UNSPEC_GOTOFF
11762 && XINT (disp, 1) != UNSPEC_PCREL
11763 && XINT (disp, 1) != UNSPEC_PLTOFF))
11764 return false;
11765
11766 if (!SYMBOL_REF_P (XVECEXP (disp, 0, 0))
11767 && !LABEL_REF_P (XVECEXP (disp, 0, 0)))
11768 return false;
11769 return true;
11770 }
11771
11772 saw_plus = false;
11773 if (GET_CODE (disp) == PLUS)
11774 {
11775 if (!CONST_INT_P (XEXP (disp, 1)))
11776 return false;
11777 disp = XEXP (disp, 0);
11778 saw_plus = true;
11779 }
11780
11781 if (TARGET_MACHO && darwin_local_data_pic (disp))
11782 return true;
11783
11784 if (GET_CODE (disp) != UNSPEC)
11785 return false;
11786
11787 switch (XINT (disp, 1))
11788 {
11789 case UNSPEC_GOT:
11790 if (saw_plus)
11791 return false;
11792 /* We need to check for both symbols and labels because VxWorks loads
11793 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11794 details. */
11795 return (SYMBOL_REF_P (XVECEXP (disp, 0, 0))
11796 || LABEL_REF_P (XVECEXP (disp, 0, 0)));
11797 case UNSPEC_GOTOFF:
11798 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11799 While ABI specify also 32bit relocation but we don't produce it in
11800 small PIC model at all. */
11801 if ((SYMBOL_REF_P (XVECEXP (disp, 0, 0))
11802 || LABEL_REF_P (XVECEXP (disp, 0, 0)))
11803 && !TARGET_64BIT)
11804 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11805 return false;
11806 case UNSPEC_GOTTPOFF:
11807 case UNSPEC_GOTNTPOFF:
11808 case UNSPEC_INDNTPOFF:
11809 if (saw_plus)
11810 return false;
11811 disp = XVECEXP (disp, 0, 0);
11812 return (SYMBOL_REF_P (disp)
11813 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11814 case UNSPEC_NTPOFF:
11815 disp = XVECEXP (disp, 0, 0);
11816 return (SYMBOL_REF_P (disp)
11817 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11818 case UNSPEC_DTPOFF:
11819 disp = XVECEXP (disp, 0, 0);
11820 return (SYMBOL_REF_P (disp)
11821 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11822 case UNSPEC_SECREL32:
11823 disp = XVECEXP (disp, 0, 0);
11824 return SYMBOL_REF_P (disp);
11825 }
11826
11827 return false;
11828}
11829
11830/* Determine if op is suitable RTX for an address register.
11831 Return naked register if a register or a register subreg is
11832 found, otherwise return NULL_RTX. */
11833
11834static rtx
11835ix86_validate_address_register (rtx op)
11836{
11837 machine_mode mode = GET_MODE (op);
11838
11839 /* Only SImode or DImode registers can form the address. */
11840 if (mode != SImode && mode != DImode)
11841 return NULL_RTX;
11842
11843 if (REG_P (op))
11844 return op;
11845 else if (SUBREG_P (op))
11846 {
11847 rtx reg = SUBREG_REG (op);
11848
11849 if (!REG_P (reg))
11850 return NULL_RTX;
11851
11852 mode = GET_MODE (reg);
11853
11854 /* Don't allow SUBREGs that span more than a word. It can
11855 lead to spill failures when the register is one word out
11856 of a two word structure. */
11857 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11858 return NULL_RTX;
11859
11860 /* Allow only SUBREGs of non-eliminable hard registers. */
11861 if (register_no_elim_operand (reg, mode))
11862 return reg;
11863 }
11864
11865 /* Op is not a register. */
11866 return NULL_RTX;
11867}
11868
11869/* Determine which memory address register set insn can use. */
11870
11871static enum attr_addr
11872ix86_memory_address_reg_class (rtx_insn* insn)
11873{
11874 /* LRA can do some initialization with NULL insn,
11875 return maximum register class in this case. */
11876 enum attr_addr addr_rclass = ADDR_GPR32;
11877
11878 if (!insn)
11879 return addr_rclass;
11880
11881 if (asm_noperands (PATTERN (insn)) >= 0
11882 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
11883 return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
11884
11885 /* Return maximum register class for unrecognized instructions. */
11886 if (INSN_CODE (insn) < 0)
11887 return addr_rclass;
11888
11889 /* Try to recognize the insn before calling get_attr_addr.
11890 Save current recog_data and current alternative. */
11891 struct recog_data_d saved_recog_data = recog_data;
11892 int saved_alternative = which_alternative;
11893
11894 /* Update recog_data for processing of alternatives. */
11895 extract_insn_cached (insn);
11896
11897 /* If current alternative is not set, loop throught enabled
11898 alternatives and get the most limited register class. */
11899 if (saved_alternative == -1)
11900 {
11901 alternative_mask enabled = get_enabled_alternatives (insn);
11902
11903 for (int i = 0; i < recog_data.n_alternatives; i++)
11904 {
11905 if (!TEST_BIT (enabled, i))
11906 continue;
11907
11908 which_alternative = i;
11909 addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
11910 }
11911 }
11912 else
11913 {
11914 which_alternative = saved_alternative;
11915 addr_rclass = get_attr_addr (insn);
11916 }
11917
11918 recog_data = saved_recog_data;
11919 which_alternative = saved_alternative;
11920
11921 return addr_rclass;
11922}
11923
11924/* Return memory address register class insn can use. */
11925
11926enum reg_class
11927ix86_insn_base_reg_class (rtx_insn* insn)
11928{
11929 switch (ix86_memory_address_reg_class (insn))
11930 {
11931 case ADDR_GPR8:
11932 return LEGACY_GENERAL_REGS;
11933 case ADDR_GPR16:
11934 return GENERAL_GPR16;
11935 case ADDR_GPR32:
11936 break;
11937 default:
11938 gcc_unreachable ();
11939 }
11940
11941 return BASE_REG_CLASS;
11942}
11943
11944bool
11945ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
11946{
11947 switch (ix86_memory_address_reg_class (insn))
11948 {
11949 case ADDR_GPR8:
11950 return LEGACY_INT_REGNO_P (regno);
11951 case ADDR_GPR16:
11952 return GENERAL_GPR16_REGNO_P (regno);
11953 case ADDR_GPR32:
11954 break;
11955 default:
11956 gcc_unreachable ();
11957 }
11958
11959 return GENERAL_REGNO_P (regno);
11960}
11961
11962enum reg_class
11963ix86_insn_index_reg_class (rtx_insn* insn)
11964{
11965 switch (ix86_memory_address_reg_class (insn))
11966 {
11967 case ADDR_GPR8:
11968 return LEGACY_INDEX_REGS;
11969 case ADDR_GPR16:
11970 return INDEX_GPR16;
11971 case ADDR_GPR32:
11972 break;
11973 default:
11974 gcc_unreachable ();
11975 }
11976
11977 return INDEX_REG_CLASS;
11978}
11979
11980/* Recognizes RTL expressions that are valid memory addresses for an
11981 instruction. The MODE argument is the machine mode for the MEM
11982 expression that wants to use this address.
11983
11984 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11985 convert common non-canonical forms to canonical form so that they will
11986 be recognized. */
11987
11988static bool
11989ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
11990 code_helper = ERROR_MARK)
11991{
11992 struct ix86_address parts;
11993 rtx base, index, disp;
11994 HOST_WIDE_INT scale;
11995 addr_space_t seg;
11996
11997 if (ix86_decompose_address (addr, out: &parts) == 0)
11998 /* Decomposition failed. */
11999 return false;
12000
12001 base = parts.base;
12002 index = parts.index;
12003 disp = parts.disp;
12004 scale = parts.scale;
12005 seg = parts.seg;
12006
12007 /* Validate base register. */
12008 if (base)
12009 {
12010 rtx reg = ix86_validate_address_register (op: base);
12011
12012 if (reg == NULL_RTX)
12013 return false;
12014
12015 unsigned int regno = REGNO (reg);
12016 if ((strict && !REGNO_OK_FOR_BASE_P (regno))
12017 || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
12018 /* Base is not valid. */
12019 return false;
12020 }
12021
12022 /* Validate index register. */
12023 if (index)
12024 {
12025 rtx reg = ix86_validate_address_register (op: index);
12026
12027 if (reg == NULL_RTX)
12028 return false;
12029
12030 unsigned int regno = REGNO (reg);
12031 if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
12032 || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
12033 /* Index is not valid. */
12034 return false;
12035 }
12036
12037 /* Index and base should have the same mode. */
12038 if (base && index
12039 && GET_MODE (base) != GET_MODE (index))
12040 return false;
12041
12042 /* Address override works only on the (%reg) part of %fs:(%reg). */
12043 if (seg != ADDR_SPACE_GENERIC
12044 && ((base && GET_MODE (base) != word_mode)
12045 || (index && GET_MODE (index) != word_mode)))
12046 return false;
12047
12048 /* Validate scale factor. */
12049 if (scale != 1)
12050 {
12051 if (!index)
12052 /* Scale without index. */
12053 return false;
12054
12055 if (scale != 2 && scale != 4 && scale != 8)
12056 /* Scale is not a valid multiplier. */
12057 return false;
12058 }
12059
12060 /* Validate displacement. */
12061 if (disp)
12062 {
12063 if (ix86_endbr_immediate_operand (disp, VOIDmode))
12064 return false;
12065
12066 if (GET_CODE (disp) == CONST
12067 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12068 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12069 switch (XINT (XEXP (disp, 0), 1))
12070 {
12071 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
12072 when used. While ABI specify also 32bit relocations, we
12073 don't produce them at all and use IP relative instead.
12074 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
12075 should be loaded via GOT. */
12076 case UNSPEC_GOT:
12077 if (!TARGET_64BIT
12078 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
12079 goto is_legitimate_pic;
12080 /* FALLTHRU */
12081 case UNSPEC_GOTOFF:
12082 gcc_assert (flag_pic);
12083 if (!TARGET_64BIT)
12084 goto is_legitimate_pic;
12085
12086 /* 64bit address unspec. */
12087 return false;
12088
12089 case UNSPEC_GOTPCREL:
12090 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
12091 goto is_legitimate_pic;
12092 /* FALLTHRU */
12093 case UNSPEC_PCREL:
12094 gcc_assert (flag_pic);
12095 goto is_legitimate_pic;
12096
12097 case UNSPEC_GOTTPOFF:
12098 case UNSPEC_GOTNTPOFF:
12099 case UNSPEC_INDNTPOFF:
12100 case UNSPEC_NTPOFF:
12101 case UNSPEC_DTPOFF:
12102 case UNSPEC_SECREL32:
12103 break;
12104
12105 default:
12106 /* Invalid address unspec. */
12107 return false;
12108 }
12109
12110 else if (SYMBOLIC_CONST (disp)
12111 && (flag_pic
12112#if TARGET_MACHO
12113 || (MACHOPIC_INDIRECT
12114 && !machopic_operand_p (disp))
12115#endif
12116 ))
12117 {
12118
12119 is_legitimate_pic:
12120 if (TARGET_64BIT && (index || base))
12121 {
12122 /* foo@dtpoff(%rX) is ok. */
12123 if (GET_CODE (disp) != CONST
12124 || GET_CODE (XEXP (disp, 0)) != PLUS
12125 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12126 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12127 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12128 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
12129 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
12130 /* Non-constant pic memory reference. */
12131 return false;
12132 }
12133 else if ((!TARGET_MACHO || flag_pic)
12134 && ! legitimate_pic_address_disp_p (disp))
12135 /* Displacement is an invalid pic construct. */
12136 return false;
12137#if TARGET_MACHO
12138 else if (MACHO_DYNAMIC_NO_PIC_P
12139 && !ix86_legitimate_constant_p (Pmode, disp))
12140 /* displacment must be referenced via non_lazy_pointer */
12141 return false;
12142#endif
12143
12144 /* This code used to verify that a symbolic pic displacement
12145 includes the pic_offset_table_rtx register.
12146
12147 While this is good idea, unfortunately these constructs may
12148 be created by "adds using lea" optimization for incorrect
12149 code like:
12150
12151 int a;
12152 int foo(int i)
12153 {
12154 return *(&a+i);
12155 }
12156
12157 This code is nonsensical, but results in addressing
12158 GOT table with pic_offset_table_rtx base. We can't
12159 just refuse it easily, since it gets matched by
12160 "addsi3" pattern, that later gets split to lea in the
12161 case output register differs from input. While this
12162 can be handled by separate addsi pattern for this case
12163 that never results in lea, this seems to be easier and
12164 correct fix for crash to disable this test. */
12165 }
12166 else if (!LABEL_REF_P (disp)
12167 && !CONST_INT_P (disp)
12168 && (GET_CODE (disp) != CONST
12169 || !ix86_legitimate_constant_p (Pmode, x: disp))
12170 && (!SYMBOL_REF_P (disp)
12171 || !ix86_legitimate_constant_p (Pmode, x: disp)))
12172 /* Displacement is not constant. */
12173 return false;
12174 else if (TARGET_64BIT
12175 && !x86_64_immediate_operand (disp, VOIDmode))
12176 /* Displacement is out of range. */
12177 return false;
12178 /* In x32 mode, constant addresses are sign extended to 64bit, so
12179 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12180 else if (TARGET_X32 && !(index || base)
12181 && CONST_INT_P (disp)
12182 && val_signbit_known_set_p (SImode, INTVAL (disp)))
12183 return false;
12184 }
12185
12186 /* Everything looks valid. */
12187 return true;
12188}
12189
12190/* Determine if a given RTX is a valid constant address. */
12191
12192bool
12193constant_address_p (rtx x)
12194{
12195 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, addr: x, strict: 1);
12196}
12197
12198
12199/* Return a legitimate reference for ORIG (an address) using the
12200 register REG. If REG is 0, a new pseudo is generated.
12201
12202 There are two types of references that must be handled:
12203
12204 1. Global data references must load the address from the GOT, via
12205 the PIC reg. An insn is emitted to do this load, and the reg is
12206 returned.
12207
12208 2. Static data references, constant pool addresses, and code labels
12209 compute the address as an offset from the GOT, whose base is in
12210 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12211 differentiate them from global data objects. The returned
12212 address is the PIC reg + an unspec constant.
12213
12214 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12215 reg also appears in the address. */
12216
12217rtx
12218legitimize_pic_address (rtx orig, rtx reg)
12219{
12220 rtx addr = orig;
12221 rtx new_rtx = orig;
12222
12223#if TARGET_MACHO
12224 if (TARGET_MACHO && !TARGET_64BIT)
12225 {
12226 if (reg == 0)
12227 reg = gen_reg_rtx (Pmode);
12228 /* Use the generic Mach-O PIC machinery. */
12229 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12230 }
12231#endif
12232
12233 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12234 {
12235#if TARGET_PECOFF
12236 rtx tmp = legitimize_pe_coff_symbol (addr, true);
12237 if (tmp)
12238 return tmp;
12239#endif
12240 }
12241
12242 if (TARGET_64BIT && legitimate_pic_address_disp_p (disp: addr))
12243 new_rtx = addr;
12244 else if ((!TARGET_64BIT
12245 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
12246 && !TARGET_PECOFF
12247 && gotoff_operand (addr, Pmode))
12248 {
12249 /* This symbol may be referenced via a displacement
12250 from the PIC base address (@GOTOFF). */
12251 if (GET_CODE (addr) == CONST)
12252 addr = XEXP (addr, 0);
12253
12254 if (GET_CODE (addr) == PLUS)
12255 {
12256 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12257 UNSPEC_GOTOFF);
12258 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12259 }
12260 else
12261 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12262
12263 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12264
12265 if (TARGET_64BIT)
12266 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12267
12268 if (reg != 0)
12269 {
12270 gcc_assert (REG_P (reg));
12271 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
12272 new_rtx, reg, 1, OPTAB_DIRECT);
12273 }
12274 else
12275 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12276 }
12277 else if ((SYMBOL_REF_P (addr) && SYMBOL_REF_TLS_MODEL (addr) == 0)
12278 /* We can't always use @GOTOFF for text labels
12279 on VxWorks, see gotoff_operand. */
12280 || (TARGET_VXWORKS_VAROFF && LABEL_REF_P (addr)))
12281 {
12282#if TARGET_PECOFF
12283 rtx tmp = legitimize_pe_coff_symbol (addr, true);
12284 if (tmp)
12285 return tmp;
12286#endif
12287
12288 /* For x64 PE-COFF there is no GOT table,
12289 so we use address directly. */
12290 if (TARGET_64BIT && TARGET_PECOFF)
12291 {
12292 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12293 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12294 }
12295 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12296 {
12297 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
12298 UNSPEC_GOTPCREL);
12299 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12300 new_rtx = gen_const_mem (Pmode, new_rtx);
12301 set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
12302 }
12303 else
12304 {
12305 /* This symbol must be referenced via a load
12306 from the Global Offset Table (@GOT). */
12307 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12308 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12309
12310 if (TARGET_64BIT)
12311 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12312
12313 if (reg != 0)
12314 {
12315 gcc_assert (REG_P (reg));
12316 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
12317 new_rtx, reg, 1, OPTAB_DIRECT);
12318 }
12319 else
12320 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12321
12322 new_rtx = gen_const_mem (Pmode, new_rtx);
12323 set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
12324 }
12325
12326 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12327 }
12328 else
12329 {
12330 if (CONST_INT_P (addr)
12331 && !x86_64_immediate_operand (addr, VOIDmode))
12332 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
12333 else if (GET_CODE (addr) == CONST)
12334 {
12335 addr = XEXP (addr, 0);
12336
12337 /* We must match stuff we generate before. Assume the only
12338 unspecs that can get here are ours. Not that we could do
12339 anything with them anyway.... */
12340 if (GET_CODE (addr) == UNSPEC
12341 || (GET_CODE (addr) == PLUS
12342 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12343 return orig;
12344 gcc_assert (GET_CODE (addr) == PLUS);
12345 }
12346
12347 if (GET_CODE (addr) == PLUS)
12348 {
12349 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12350
12351 /* Check first to see if this is a constant
12352 offset from a @GOTOFF symbol reference. */
12353 if (!TARGET_PECOFF
12354 && gotoff_operand (op0, Pmode)
12355 && CONST_INT_P (op1))
12356 {
12357 if (!TARGET_64BIT)
12358 {
12359 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12360 UNSPEC_GOTOFF);
12361 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12362 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12363
12364 if (reg != 0)
12365 {
12366 gcc_assert (REG_P (reg));
12367 new_rtx = expand_simple_binop (Pmode, PLUS,
12368 pic_offset_table_rtx,
12369 new_rtx, reg, 1,
12370 OPTAB_DIRECT);
12371 }
12372 else
12373 new_rtx
12374 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12375 }
12376 else
12377 {
12378 if (INTVAL (op1) < -16*1024*1024
12379 || INTVAL (op1) >= 16*1024*1024)
12380 {
12381 if (!x86_64_immediate_operand (op1, Pmode))
12382 op1 = force_reg (Pmode, op1);
12383
12384 new_rtx
12385 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12386 }
12387 }
12388 }
12389 else
12390 {
12391 rtx base = legitimize_pic_address (orig: op0, reg);
12392 machine_mode mode = GET_MODE (base);
12393 new_rtx
12394 = legitimize_pic_address (orig: op1, reg: base == reg ? NULL_RTX : reg);
12395
12396 if (CONST_INT_P (new_rtx))
12397 {
12398 if (INTVAL (new_rtx) < -16*1024*1024
12399 || INTVAL (new_rtx) >= 16*1024*1024)
12400 {
12401 if (!x86_64_immediate_operand (new_rtx, mode))
12402 new_rtx = force_reg (mode, new_rtx);
12403
12404 new_rtx
12405 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
12406 }
12407 else
12408 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
12409 }
12410 else
12411 {
12412 /* For %rip addressing, we have to use
12413 just disp32, not base nor index. */
12414 if (TARGET_64BIT
12415 && (SYMBOL_REF_P (base)
12416 || LABEL_REF_P (base)))
12417 base = force_reg (mode, base);
12418 if (GET_CODE (new_rtx) == PLUS
12419 && CONSTANT_P (XEXP (new_rtx, 1)))
12420 {
12421 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
12422 new_rtx = XEXP (new_rtx, 1);
12423 }
12424 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
12425 }
12426 }
12427 }
12428 }
12429 return new_rtx;
12430}
12431
12432/* Load the thread pointer. If TO_REG is true, force it into a register. */
12433
12434static rtx
12435get_thread_pointer (machine_mode tp_mode, bool to_reg)
12436{
12437 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12438
12439 if (GET_MODE (tp) != tp_mode)
12440 {
12441 gcc_assert (GET_MODE (tp) == SImode);
12442 gcc_assert (tp_mode == DImode);
12443
12444 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
12445 }
12446
12447 if (to_reg)
12448 tp = copy_to_mode_reg (tp_mode, tp);
12449
12450 return tp;
12451}
12452
12453/* Construct the SYMBOL_REF for the _tls_index symbol. */
12454
12455static GTY(()) rtx ix86_tls_index_symbol;
12456
12457static rtx
12458ix86_tls_index (void)
12459{
12460 if (!ix86_tls_index_symbol)
12461 ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
12462
12463 if (flag_pic)
12464 return gen_rtx_CONST (Pmode,
12465 gen_rtx_UNSPEC (Pmode,
12466 gen_rtvec (1, ix86_tls_index_symbol),
12467 UNSPEC_PCREL));
12468 else
12469 return ix86_tls_index_symbol;
12470}
12471
12472/* Construct the SYMBOL_REF for the tls_get_addr function. */
12473
12474static GTY(()) rtx ix86_tls_symbol;
12475
12476rtx
12477ix86_tls_get_addr (void)
12478{
12479 if (cfun->machine->call_saved_registers
12480 == TYPE_NO_CALLER_SAVED_REGISTERS)
12481 {
12482 /* __tls_get_addr doesn't preserve vector registers. When a
12483 function with no_caller_saved_registers attribute calls
12484 __tls_get_addr, YMM and ZMM registers will be clobbered.
12485 Issue an error and suggest -mtls-dialect=gnu2 in this case. */
12486 if (cfun->machine->func_type == TYPE_NORMAL)
12487 error (G_("%<-mtls-dialect=gnu2%> must be used with a function"
12488 " with the %<no_caller_saved_registers%> attribute"));
12489 else
12490 error (cfun->machine->func_type == TYPE_EXCEPTION
12491 ? G_("%<-mtls-dialect=gnu2%> must be used with an"
12492 " exception service routine")
12493 : G_("%<-mtls-dialect=gnu2%> must be used with an"
12494 " interrupt service routine"));
12495 /* Don't issue the same error twice. */
12496 cfun->machine->func_type = TYPE_NORMAL;
12497 cfun->machine->call_saved_registers
12498 = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
12499 }
12500
12501 if (!ix86_tls_symbol)
12502 {
12503 const char *sym
12504 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12505 ? "___tls_get_addr" : "__tls_get_addr");
12506
12507 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12508 }
12509
12510 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
12511 {
12512 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
12513 UNSPEC_PLTOFF);
12514 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
12515 gen_rtx_CONST (Pmode, unspec));
12516 }
12517
12518 return ix86_tls_symbol;
12519}
12520
12521/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12522
12523static GTY(()) rtx ix86_tls_module_base_symbol;
12524
12525rtx
12526ix86_tls_module_base (void)
12527{
12528 if (!ix86_tls_module_base_symbol)
12529 {
12530 ix86_tls_module_base_symbol
12531 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
12532
12533 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12534 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12535 }
12536
12537 return ix86_tls_module_base_symbol;
12538}
12539
12540/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12541 false if we expect this to be used for a memory address and true if
12542 we expect to load the address into a register. */
12543
12544rtx
12545legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12546{
12547 rtx dest, base, off;
12548 rtx pic = NULL_RTX, tp = NULL_RTX;
12549 machine_mode tp_mode = Pmode;
12550 int type;
12551
12552 /* Windows implements a single form of TLS. */
12553 if (TARGET_WIN32_TLS)
12554 {
12555 /* Load the 32-bit index. */
12556 rtx ind = gen_const_mem (SImode, ix86_tls_index ());
12557 set_mem_alias_set (ind, GOT_ALIAS_SET);
12558 if (TARGET_64BIT)
12559 ind = convert_to_mode (Pmode, ind, 1);
12560 ind = force_reg (Pmode, ind);
12561
12562 /* Add it to the thread pointer and load the base. */
12563 tp = get_thread_pointer (Pmode, to_reg: true);
12564 rtx addr = gen_rtx_PLUS (Pmode, tp,
12565 gen_rtx_MULT (Pmode, ind,
12566 GEN_INT (UNITS_PER_WORD)));
12567 base = gen_const_mem (Pmode, addr);
12568 set_mem_alias_set (base, GOT_ALIAS_SET);
12569
12570 /* Add the 32-bit section-relative offset to the base. */
12571 base = force_reg (Pmode, base);
12572 off = gen_rtx_CONST (Pmode,
12573 gen_rtx_UNSPEC (SImode,
12574 gen_rtvec (1, x),
12575 UNSPEC_SECREL32));
12576 return gen_rtx_PLUS (Pmode, base, off);
12577 }
12578
12579 /* Fall back to global dynamic model if tool chain cannot support local
12580 dynamic. */
12581 if (TARGET_SUN_TLS && !TARGET_64BIT
12582 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
12583 && model == TLS_MODEL_LOCAL_DYNAMIC)
12584 model = TLS_MODEL_GLOBAL_DYNAMIC;
12585
12586 switch (model)
12587 {
12588 case TLS_MODEL_GLOBAL_DYNAMIC:
12589 if (!TARGET_64BIT)
12590 {
12591 if (flag_pic && !TARGET_PECOFF)
12592 pic = pic_offset_table_rtx;
12593 else
12594 {
12595 pic = gen_reg_rtx (Pmode);
12596 emit_insn (gen_set_got (pic));
12597 }
12598 }
12599
12600 if (TARGET_GNU2_TLS)
12601 {
12602 dest = gen_reg_rtx (ptr_mode);
12603 if (TARGET_64BIT)
12604 emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: dest, x1: x));
12605 else
12606 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12607
12608 tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true);
12609 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12610 if (GET_MODE (dest) != Pmode)
12611 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12612 dest = force_reg (Pmode, dest);
12613
12614 if (GET_MODE (x) != Pmode)
12615 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12616
12617 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12618 }
12619 else
12620 {
12621 rtx caddr = ix86_tls_get_addr ();
12622
12623 dest = gen_reg_rtx (Pmode);
12624 if (TARGET_64BIT)
12625 {
12626 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12627 rtx rdi = gen_rtx_REG (Pmode, DI_REG);
12628 rtx_insn *insns;
12629
12630 start_sequence ();
12631 emit_call_insn
12632 (gen_tls_global_dynamic_64 (Pmode, x0: rax, x1: x, x2: caddr, x3: rdi));
12633 insns = end_sequence ();
12634
12635 if (GET_MODE (x) != Pmode)
12636 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12637
12638 RTL_CONST_CALL_P (insns) = 1;
12639 emit_libcall_block (insns, dest, rax, x);
12640 }
12641 else
12642 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12643 }
12644 break;
12645
12646 case TLS_MODEL_LOCAL_DYNAMIC:
12647 if (!TARGET_64BIT)
12648 {
12649 if (flag_pic)
12650 pic = pic_offset_table_rtx;
12651 else
12652 {
12653 pic = gen_reg_rtx (Pmode);
12654 emit_insn (gen_set_got (pic));
12655 }
12656 }
12657
12658 if (TARGET_GNU2_TLS)
12659 {
12660 rtx tmp = ix86_tls_module_base ();
12661
12662 base = gen_reg_rtx (ptr_mode);
12663 if (TARGET_64BIT)
12664 emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: base, x1: tmp));
12665 else
12666 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12667
12668 tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true);
12669 if (GET_MODE (base) != Pmode)
12670 base = gen_rtx_ZERO_EXTEND (Pmode, base);
12671 base = force_reg (Pmode, base);
12672 }
12673 else
12674 {
12675 rtx caddr = ix86_tls_get_addr ();
12676
12677 base = gen_reg_rtx (Pmode);
12678 if (TARGET_64BIT)
12679 {
12680 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12681 rtx rdi = gen_rtx_REG (Pmode, DI_REG);
12682 rtx_insn *insns;
12683 rtx eqv;
12684
12685 start_sequence ();
12686 emit_call_insn
12687 (gen_tls_local_dynamic_base_64 (Pmode, x0: rax, x1: caddr, x2: rdi));
12688 insns = end_sequence ();
12689
12690 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12691 share the LD_BASE result with other LD model accesses. */
12692 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12693 UNSPEC_TLS_LD_BASE);
12694
12695 RTL_CONST_CALL_P (insns) = 1;
12696 emit_libcall_block (insns, base, rax, eqv);
12697 }
12698 else
12699 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12700 }
12701
12702 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12703 off = gen_rtx_CONST (Pmode, off);
12704
12705 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12706
12707 if (TARGET_GNU2_TLS)
12708 {
12709 if (GET_MODE (tp) != Pmode)
12710 {
12711 dest = lowpart_subreg (outermode: ptr_mode, op: dest, Pmode);
12712 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12713 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12714 }
12715 else
12716 dest = gen_rtx_PLUS (Pmode, tp, dest);
12717 dest = force_reg (Pmode, dest);
12718
12719 if (GET_MODE (x) != Pmode)
12720 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12721
12722 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12723 }
12724 break;
12725
12726 case TLS_MODEL_INITIAL_EXEC:
12727 if (TARGET_64BIT)
12728 {
12729 /* Generate DImode references to avoid %fs:(%reg32)
12730 problems and linker IE->LE relaxation bug. */
12731 tp_mode = DImode;
12732 pic = NULL;
12733 type = UNSPEC_GOTNTPOFF;
12734 }
12735 else if (flag_pic)
12736 {
12737 pic = pic_offset_table_rtx;
12738 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12739 }
12740 else if (!TARGET_ANY_GNU_TLS)
12741 {
12742 pic = gen_reg_rtx (Pmode);
12743 emit_insn (gen_set_got (pic));
12744 type = UNSPEC_GOTTPOFF;
12745 }
12746 else
12747 {
12748 pic = NULL;
12749 type = UNSPEC_INDNTPOFF;
12750 }
12751
12752 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
12753 off = gen_rtx_CONST (tp_mode, off);
12754 if (pic)
12755 off = gen_rtx_PLUS (tp_mode, pic, off);
12756 off = gen_const_mem (tp_mode, off);
12757 set_mem_alias_set (off, GOT_ALIAS_SET);
12758
12759 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12760 {
12761 base = get_thread_pointer (tp_mode,
12762 to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12763 off = force_reg (tp_mode, off);
12764 dest = gen_rtx_PLUS (tp_mode, base, off);
12765 if (tp_mode != Pmode)
12766 dest = convert_to_mode (Pmode, dest, 1);
12767 }
12768 else
12769 {
12770 base = get_thread_pointer (Pmode, to_reg: true);
12771 dest = gen_reg_rtx (Pmode);
12772 emit_insn (gen_sub3_insn (dest, base, off));
12773 }
12774 break;
12775
12776 case TLS_MODEL_LOCAL_EXEC:
12777 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12778 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12779 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12780 off = gen_rtx_CONST (Pmode, off);
12781
12782 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12783 {
12784 base = get_thread_pointer (Pmode,
12785 to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12786 return gen_rtx_PLUS (Pmode, base, off);
12787 }
12788 else
12789 {
12790 base = get_thread_pointer (Pmode, to_reg: true);
12791 dest = gen_reg_rtx (Pmode);
12792 emit_insn (gen_sub3_insn (dest, base, off));
12793 }
12794 break;
12795
12796 default:
12797 gcc_unreachable ();
12798 }
12799
12800 return dest;
12801}
12802
12803/* Return true if the TLS address requires insn using integer registers.
12804 It's used to prevent KMOV/VMOV in TLS code sequences which require integer
12805 MOV instructions, refer to PR103275. */
12806bool
12807ix86_gpr_tls_address_pattern_p (rtx mem)
12808{
12809 gcc_assert (MEM_P (mem));
12810
12811 rtx addr = XEXP (mem, 0);
12812 subrtx_var_iterator::array_type array;
12813 FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
12814 {
12815 rtx op = *iter;
12816 if (GET_CODE (op) == UNSPEC)
12817 switch (XINT (op, 1))
12818 {
12819 case UNSPEC_GOTNTPOFF:
12820 return true;
12821 case UNSPEC_TPOFF:
12822 if (!TARGET_64BIT)
12823 return true;
12824 break;
12825 default:
12826 break;
12827 }
12828 }
12829
12830 return false;
12831}
12832
12833/* Return true if OP refers to a TLS address. */
12834bool
12835ix86_tls_address_pattern_p (rtx op)
12836{
12837 subrtx_var_iterator::array_type array;
12838 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
12839 {
12840 rtx op = *iter;
12841 if (MEM_P (op))
12842 {
12843 rtx *x = &XEXP (op, 0);
12844 while (GET_CODE (*x) == PLUS)
12845 {
12846 int i;
12847 for (i = 0; i < 2; i++)
12848 {
12849 rtx u = XEXP (*x, i);
12850 if (GET_CODE (u) == ZERO_EXTEND)
12851 u = XEXP (u, 0);
12852 if (GET_CODE (u) == UNSPEC
12853 && XINT (u, 1) == UNSPEC_TP)
12854 return true;
12855 }
12856 x = &XEXP (*x, 0);
12857 }
12858
12859 iter.skip_subrtxes ();
12860 }
12861 }
12862
12863 return false;
12864}
12865
12866/* Rewrite *LOC so that it refers to a default TLS address space. */
12867static void
12868ix86_rewrite_tls_address_1 (rtx *loc)
12869{
12870 subrtx_ptr_iterator::array_type array;
12871 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
12872 {
12873 rtx *loc = *iter;
12874 if (MEM_P (*loc))
12875 {
12876 rtx addr = XEXP (*loc, 0);
12877 rtx *x = &addr;
12878 while (GET_CODE (*x) == PLUS)
12879 {
12880 int i;
12881 for (i = 0; i < 2; i++)
12882 {
12883 rtx u = XEXP (*x, i);
12884 if (GET_CODE (u) == ZERO_EXTEND)
12885 u = XEXP (u, 0);
12886 if (GET_CODE (u) == UNSPEC
12887 && XINT (u, 1) == UNSPEC_TP)
12888 {
12889 /* NB: Since address override only applies to the
12890 (reg32) part in fs:(reg32), return if address
12891 override is used. */
12892 if (Pmode != word_mode
12893 && REG_P (XEXP (*x, 1 - i)))
12894 return;
12895
12896 addr_space_t as = DEFAULT_TLS_SEG_REG;
12897
12898 *x = XEXP (*x, 1 - i);
12899
12900 *loc = replace_equiv_address_nv (*loc, addr, true);
12901 set_mem_addr_space (*loc, as);
12902 return;
12903 }
12904 }
12905 x = &XEXP (*x, 0);
12906 }
12907
12908 iter.skip_subrtxes ();
12909 }
12910 }
12911}
12912
12913/* Rewrite instruction pattern involvning TLS address
12914 so that it refers to a default TLS address space. */
12915rtx
12916ix86_rewrite_tls_address (rtx pattern)
12917{
12918 pattern = copy_insn (pattern);
12919 ix86_rewrite_tls_address_1 (loc: &pattern);
12920 return pattern;
12921}
12922
12923/* Try machine-dependent ways of modifying an illegitimate address
12924 to be legitimate. If we find one, return the new, valid address.
12925 This macro is used in only one place: `memory_address' in explow.cc.
12926
12927 OLDX is the address as it was before break_out_memory_refs was called.
12928 In some cases it is useful to look at this to decide what needs to be done.
12929
12930 It is always safe for this macro to do nothing. It exists to recognize
12931 opportunities to optimize the output.
12932
12933 For the 80386, we handle X+REG by loading X into a register R and
12934 using R+REG. R will go in a general reg and indexing will be used.
12935 However, if REG is a broken-out memory address or multiplication,
12936 nothing needs to be done because REG can certainly go in a general reg.
12937
12938 When -fpic is used, special handling is needed for symbolic references.
12939 See comments by legitimize_pic_address in i386.cc for details. */
12940
12941static rtx
12942ix86_legitimize_address (rtx x, rtx, machine_mode mode)
12943{
12944 bool changed = false;
12945 unsigned log;
12946
12947 log = SYMBOL_REF_P (x) ? SYMBOL_REF_TLS_MODEL (x) : 0;
12948 if (log)
12949 return legitimize_tls_address (x, model: (enum tls_model) log, for_mov: false);
12950 if (GET_CODE (x) == CONST
12951 && GET_CODE (XEXP (x, 0)) == PLUS
12952 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12953 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12954 {
12955 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12956 model: (enum tls_model) log, for_mov: false);
12957 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12958 }
12959
12960 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12961 {
12962#if TARGET_PECOFF
12963 rtx tmp = legitimize_pe_coff_symbol (x, true);
12964 if (tmp)
12965 return tmp;
12966#endif
12967 }
12968
12969 if (flag_pic && SYMBOLIC_CONST (x))
12970 return legitimize_pic_address (orig: x, reg: 0);
12971
12972#if TARGET_MACHO
12973 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12974 return machopic_indirect_data_reference (x, 0);
12975#endif
12976
12977 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12978 if (GET_CODE (x) == ASHIFT
12979 && CONST_INT_P (XEXP (x, 1))
12980 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12981 {
12982 changed = true;
12983 log = INTVAL (XEXP (x, 1));
12984 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12985 GEN_INT (1 << log));
12986 }
12987
12988 if (GET_CODE (x) == PLUS)
12989 {
12990 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12991
12992 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12993 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12994 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12995 {
12996 changed = true;
12997 log = INTVAL (XEXP (XEXP (x, 0), 1));
12998 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12999 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13000 GEN_INT (1 << log));
13001 }
13002
13003 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13004 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13005 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13006 {
13007 changed = true;
13008 log = INTVAL (XEXP (XEXP (x, 1), 1));
13009 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13010 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13011 GEN_INT (1 << log));
13012 }
13013
13014 /* Put multiply first if it isn't already. */
13015 if (GET_CODE (XEXP (x, 1)) == MULT)
13016 {
13017 std::swap (XEXP (x, 0), XEXP (x, 1));
13018 changed = true;
13019 }
13020
13021 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13022 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13023 created by virtual register instantiation, register elimination, and
13024 similar optimizations. */
13025 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13026 {
13027 changed = true;
13028 x = gen_rtx_PLUS (Pmode,
13029 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13030 XEXP (XEXP (x, 1), 0)),
13031 XEXP (XEXP (x, 1), 1));
13032 }
13033
13034 /* Canonicalize
13035 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13036 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13037 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13038 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13039 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13040 && CONSTANT_P (XEXP (x, 1)))
13041 {
13042 rtx constant;
13043 rtx other = NULL_RTX;
13044
13045 if (CONST_INT_P (XEXP (x, 1)))
13046 {
13047 constant = XEXP (x, 1);
13048 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13049 }
13050 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13051 {
13052 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13053 other = XEXP (x, 1);
13054 }
13055 else
13056 constant = 0;
13057
13058 if (constant)
13059 {
13060 changed = true;
13061 x = gen_rtx_PLUS (Pmode,
13062 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13063 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13064 plus_constant (Pmode, other,
13065 INTVAL (constant)));
13066 }
13067 }
13068
13069 if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false))
13070 return x;
13071
13072 if (GET_CODE (XEXP (x, 0)) == MULT)
13073 {
13074 changed = true;
13075 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13076 }
13077
13078 if (GET_CODE (XEXP (x, 1)) == MULT)
13079 {
13080 changed = true;
13081 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13082 }
13083
13084 if (changed
13085 && REG_P (XEXP (x, 1))
13086 && REG_P (XEXP (x, 0)))
13087 return x;
13088
13089 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13090 {
13091 changed = true;
13092 x = legitimize_pic_address (orig: x, reg: 0);
13093 }
13094
13095 if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false))
13096 return x;
13097
13098 if (REG_P (XEXP (x, 0)))
13099 {
13100 rtx temp = gen_reg_rtx (Pmode);
13101 rtx val = force_operand (XEXP (x, 1), temp);
13102 if (val != temp)
13103 {
13104 val = convert_to_mode (Pmode, val, 1);
13105 emit_move_insn (temp, val);
13106 }
13107
13108 XEXP (x, 1) = temp;
13109 return x;
13110 }
13111
13112 else if (REG_P (XEXP (x, 1)))
13113 {
13114 rtx temp = gen_reg_rtx (Pmode);
13115 rtx val = force_operand (XEXP (x, 0), temp);
13116 if (val != temp)
13117 {
13118 val = convert_to_mode (Pmode, val, 1);
13119 emit_move_insn (temp, val);
13120 }
13121
13122 XEXP (x, 0) = temp;
13123 return x;
13124 }
13125 }
13126
13127 return x;
13128}
13129
13130/* Print an integer constant expression in assembler syntax. Addition
13131 and subtraction are the only arithmetic that may appear in these
13132 expressions. FILE is the stdio stream to write to, X is the rtx, and
13133 CODE is the operand print code from the output string. */
13134
13135static void
13136output_pic_addr_const (FILE *file, rtx x, int code)
13137{
13138 char buf[256];
13139
13140 switch (GET_CODE (x))
13141 {
13142 case PC:
13143 gcc_assert (flag_pic);
13144 putc (c: '.', stream: file);
13145 break;
13146
13147 case SYMBOL_REF:
13148 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
13149 output_addr_const (file, x);
13150 else
13151 {
13152 const char *name = XSTR (x, 0);
13153
13154 /* Mark the decl as referenced so that cgraph will
13155 output the function. */
13156 if (SYMBOL_REF_DECL (x))
13157 mark_decl_referenced (SYMBOL_REF_DECL (x));
13158
13159#if TARGET_MACHO
13160 if (MACHOPIC_INDIRECT
13161 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13162 name = machopic_indirection_name (x, /*stub_p=*/true);
13163#endif
13164 assemble_name (file, name);
13165 }
13166 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
13167 && code == 'P' && ix86_call_use_plt_p (x))
13168 fputs (s: "@PLT", stream: file);
13169 break;
13170
13171 case LABEL_REF:
13172 x = XEXP (x, 0);
13173 /* FALLTHRU */
13174 case CODE_LABEL:
13175 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13176 assemble_name (asm_out_file, buf);
13177 break;
13178
13179 CASE_CONST_SCALAR_INT:
13180 output_addr_const (file, x);
13181 break;
13182
13183 case CONST:
13184 /* This used to output parentheses around the expression,
13185 but that does not work on the 386 (either ATT or BSD assembler). */
13186 output_pic_addr_const (file, XEXP (x, 0), code);
13187 break;
13188
13189 case CONST_DOUBLE:
13190 /* We can't handle floating point constants;
13191 TARGET_PRINT_OPERAND must handle them. */
13192 output_operand_lossage ("floating constant misused");
13193 break;
13194
13195 case PLUS:
13196 /* Some assemblers need integer constants to appear first. */
13197 if (CONST_INT_P (XEXP (x, 0)))
13198 {
13199 output_pic_addr_const (file, XEXP (x, 0), code);
13200 putc (c: '+', stream: file);
13201 output_pic_addr_const (file, XEXP (x, 1), code);
13202 }
13203 else
13204 {
13205 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13206 output_pic_addr_const (file, XEXP (x, 1), code);
13207 putc (c: '+', stream: file);
13208 output_pic_addr_const (file, XEXP (x, 0), code);
13209 }
13210 break;
13211
13212 case MINUS:
13213 if (!TARGET_MACHO)
13214 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', stream: file);
13215 output_pic_addr_const (file, XEXP (x, 0), code);
13216 putc (c: '-', stream: file);
13217 output_pic_addr_const (file, XEXP (x, 1), code);
13218 if (!TARGET_MACHO)
13219 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', stream: file);
13220 break;
13221
13222 case UNSPEC:
13223 gcc_assert (XVECLEN (x, 0) == 1);
13224 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13225 switch (XINT (x, 1))
13226 {
13227 case UNSPEC_GOT:
13228 fputs (s: "@GOT", stream: file);
13229 break;
13230 case UNSPEC_GOTOFF:
13231 fputs (s: "@GOTOFF", stream: file);
13232 break;
13233 case UNSPEC_PLTOFF:
13234 fputs (s: "@PLTOFF", stream: file);
13235 break;
13236 case UNSPEC_PCREL:
13237 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13238 "(%rip)" : "[rip]", stream: file);
13239 break;
13240 case UNSPEC_GOTPCREL:
13241 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13242 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", stream: file);
13243 break;
13244 case UNSPEC_GOTTPOFF:
13245 /* FIXME: This might be @TPOFF in Sun ld too. */
13246 fputs (s: "@gottpoff", stream: file);
13247 break;
13248 case UNSPEC_TPOFF:
13249 fputs (s: "@tpoff", stream: file);
13250 break;
13251 case UNSPEC_NTPOFF:
13252 if (TARGET_64BIT)
13253 fputs (s: "@tpoff", stream: file);
13254 else
13255 fputs (s: "@ntpoff", stream: file);
13256 break;
13257 case UNSPEC_DTPOFF:
13258 fputs (s: "@dtpoff", stream: file);
13259 break;
13260 case UNSPEC_GOTNTPOFF:
13261 if (TARGET_64BIT)
13262 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13263 "@gottpoff(%rip)": "@gottpoff[rip]", stream: file);
13264 else
13265 fputs (s: "@gotntpoff", stream: file);
13266 break;
13267 case UNSPEC_INDNTPOFF:
13268 fputs (s: "@indntpoff", stream: file);
13269 break;
13270 case UNSPEC_SECREL32:
13271 fputs (s: "@secrel32", stream: file);
13272 break;
13273#if TARGET_MACHO
13274 case UNSPEC_MACHOPIC_OFFSET:
13275 putc ('-', file);
13276 machopic_output_function_base_name (file);
13277 break;
13278#endif
13279 default:
13280 output_operand_lossage ("invalid UNSPEC as operand");
13281 break;
13282 }
13283 break;
13284
13285 default:
13286 output_operand_lossage ("invalid expression as operand");
13287 }
13288}
13289
13290/* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13291 We need to emit DTP-relative relocations. */
13292
13293static void ATTRIBUTE_UNUSED
13294i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13295{
13296 fputs (ASM_LONG, stream: file);
13297 output_addr_const (file, x);
13298#if TARGET_WIN32_TLS
13299 fputs ("@secrel32", file);
13300#else
13301 fputs (s: "@dtpoff", stream: file);
13302#endif
13303 switch (size)
13304 {
13305 case 4:
13306 break;
13307 case 8:
13308 fputs (s: ", 0", stream: file);
13309 break;
13310 default:
13311 gcc_unreachable ();
13312 }
13313}
13314
13315/* Return true if X is a representation of the PIC register. This copes
13316 with calls from ix86_find_base_term, where the register might have
13317 been replaced by a cselib value. */
13318
13319static bool
13320ix86_pic_register_p (rtx x)
13321{
13322 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13323 return (pic_offset_table_rtx
13324 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13325 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
13326 return true;
13327 else if (!REG_P (x))
13328 return false;
13329 else if (pic_offset_table_rtx)
13330 {
13331 if (REGNO (x) == REGNO (pic_offset_table_rtx))
13332 return true;
13333 if (HARD_REGISTER_P (x)
13334 && !HARD_REGISTER_P (pic_offset_table_rtx)
13335 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
13336 return true;
13337 return false;
13338 }
13339 else
13340 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13341}
13342
13343/* Helper function for ix86_delegitimize_address.
13344 Attempt to delegitimize TLS local-exec accesses. */
13345
13346static rtx
13347ix86_delegitimize_tls_address (rtx orig_x)
13348{
13349 rtx x = orig_x, unspec;
13350 struct ix86_address addr;
13351
13352 if (!TARGET_TLS_DIRECT_SEG_REFS)
13353 return orig_x;
13354 if (MEM_P (x))
13355 x = XEXP (x, 0);
13356 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13357 return orig_x;
13358 if (ix86_decompose_address (addr: x, out: &addr) == 0
13359 || addr.seg != DEFAULT_TLS_SEG_REG
13360 || addr.disp == NULL_RTX
13361 || GET_CODE (addr.disp) != CONST)
13362 return orig_x;
13363 unspec = XEXP (addr.disp, 0);
13364 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13365 unspec = XEXP (unspec, 0);
13366 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13367 return orig_x;
13368 x = XVECEXP (unspec, 0, 0);
13369 gcc_assert (SYMBOL_REF_P (x));
13370 if (unspec != XEXP (addr.disp, 0))
13371 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13372 if (addr.index)
13373 {
13374 rtx idx = addr.index;
13375 if (addr.scale != 1)
13376 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13377 x = gen_rtx_PLUS (Pmode, idx, x);
13378 }
13379 if (addr.base)
13380 x = gen_rtx_PLUS (Pmode, addr.base, x);
13381 if (MEM_P (orig_x))
13382 x = replace_equiv_address_nv (orig_x, x);
13383 return x;
13384}
13385
13386/* In the name of slightly smaller debug output, and to cater to
13387 general assembler lossage, recognize PIC+GOTOFF and turn it back
13388 into a direct symbol reference.
13389
13390 On Darwin, this is necessary to avoid a crash, because Darwin
13391 has a different PIC label for each routine but the DWARF debugging
13392 information is not associated with any particular routine, so it's
13393 necessary to remove references to the PIC label from RTL stored by
13394 the DWARF output code.
13395
13396 This helper is used in the normal ix86_delegitimize_address
13397 entrypoint (e.g. used in the target delegitimization hook) and
13398 in ix86_find_base_term. As compile time memory optimization, we
13399 avoid allocating rtxes that will not change anything on the outcome
13400 of the callers (find_base_value and find_base_term). */
13401
13402static inline rtx
13403ix86_delegitimize_address_1 (rtx x, bool base_term_p)
13404{
13405 rtx orig_x = delegitimize_mem_from_attrs (x);
13406 /* addend is NULL or some rtx if x is something+GOTOFF where
13407 something doesn't include the PIC register. */
13408 rtx addend = NULL_RTX;
13409 /* reg_addend is NULL or a multiple of some register. */
13410 rtx reg_addend = NULL_RTX;
13411 /* const_addend is NULL or a const_int. */
13412 rtx const_addend = NULL_RTX;
13413 /* This is the result, or NULL. */
13414 rtx result = NULL_RTX;
13415
13416 x = orig_x;
13417
13418 if (MEM_P (x))
13419 x = XEXP (x, 0);
13420
13421 if (TARGET_64BIT)
13422 {
13423 if (GET_CODE (x) == CONST
13424 && GET_CODE (XEXP (x, 0)) == PLUS
13425 && GET_MODE (XEXP (x, 0)) == Pmode
13426 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13427 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13428 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13429 {
13430 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
13431 base. A CONST can't be arg_pointer_rtx based. */
13432 if (base_term_p && MEM_P (orig_x))
13433 return orig_x;
13434 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13435 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13436 if (MEM_P (orig_x))
13437 x = replace_equiv_address_nv (orig_x, x);
13438 return x;
13439 }
13440
13441 if (GET_CODE (x) == CONST
13442 && GET_CODE (XEXP (x, 0)) == UNSPEC
13443 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
13444 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
13445 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
13446 {
13447 x = XVECEXP (XEXP (x, 0), 0, 0);
13448 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13449 {
13450 x = lowpart_subreg (GET_MODE (orig_x), op: x, GET_MODE (x));
13451 if (x == NULL_RTX)
13452 return orig_x;
13453 }
13454 return x;
13455 }
13456
13457 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
13458 return ix86_delegitimize_tls_address (orig_x);
13459
13460 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13461 and -mcmodel=medium -fpic. */
13462 }
13463
13464 if (GET_CODE (x) != PLUS
13465 || GET_CODE (XEXP (x, 1)) != CONST)
13466 return ix86_delegitimize_tls_address (orig_x);
13467
13468 if (ix86_pic_register_p (XEXP (x, 0)))
13469 /* %ebx + GOT/GOTOFF */
13470 ;
13471 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13472 {
13473 /* %ebx + %reg * scale + GOT/GOTOFF */
13474 reg_addend = XEXP (x, 0);
13475 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13476 reg_addend = XEXP (reg_addend, 1);
13477 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13478 reg_addend = XEXP (reg_addend, 0);
13479 else
13480 {
13481 reg_addend = NULL_RTX;
13482 addend = XEXP (x, 0);
13483 }
13484 }
13485 else
13486 addend = XEXP (x, 0);
13487
13488 x = XEXP (XEXP (x, 1), 0);
13489 if (GET_CODE (x) == PLUS
13490 && CONST_INT_P (XEXP (x, 1)))
13491 {
13492 const_addend = XEXP (x, 1);
13493 x = XEXP (x, 0);
13494 }
13495
13496 if (GET_CODE (x) == UNSPEC
13497 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13498 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
13499 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
13500 && !MEM_P (orig_x) && !addend)))
13501 result = XVECEXP (x, 0, 0);
13502
13503 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (disp: x)
13504 && !MEM_P (orig_x))
13505 result = XVECEXP (x, 0, 0);
13506
13507 if (! result)
13508 return ix86_delegitimize_tls_address (orig_x);
13509
13510 /* For (PLUS something CONST_INT) both find_base_{value,term} just
13511 recurse on the first operand. */
13512 if (const_addend && !base_term_p)
13513 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13514 if (reg_addend)
13515 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13516 if (addend)
13517 {
13518 /* If the rest of original X doesn't involve the PIC register, add
13519 addend and subtract pic_offset_table_rtx. This can happen e.g.
13520 for code like:
13521 leal (%ebx, %ecx, 4), %ecx
13522 ...
13523 movl foo@GOTOFF(%ecx), %edx
13524 in which case we return (%ecx - %ebx) + foo
13525 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
13526 and reload has completed. Don't do the latter for debug,
13527 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
13528 if (pic_offset_table_rtx
13529 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
13530 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13531 pic_offset_table_rtx),
13532 result);
13533 else if (base_term_p
13534 && pic_offset_table_rtx
13535 && !TARGET_MACHO
13536 && !TARGET_VXWORKS_VAROFF)
13537 {
13538 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13539 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
13540 result = gen_rtx_PLUS (Pmode, tmp, result);
13541 }
13542 else
13543 return orig_x;
13544 }
13545 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13546 {
13547 result = lowpart_subreg (GET_MODE (orig_x), op: result, Pmode);
13548 if (result == NULL_RTX)
13549 return orig_x;
13550 }
13551 return result;
13552}
13553
13554/* The normal instantiation of the above template. */
13555
13556static rtx
13557ix86_delegitimize_address (rtx x)
13558{
13559 return ix86_delegitimize_address_1 (x, base_term_p: false);
13560}
13561
13562/* If X is a machine specific address (i.e. a symbol or label being
13563 referenced as a displacement from the GOT implemented using an
13564 UNSPEC), then return the base term. Otherwise return X. */
13565
13566rtx
13567ix86_find_base_term (rtx x)
13568{
13569 rtx term;
13570
13571 if (TARGET_64BIT)
13572 {
13573 if (GET_CODE (x) != CONST)
13574 return x;
13575 term = XEXP (x, 0);
13576 if (GET_CODE (term) == PLUS
13577 && CONST_INT_P (XEXP (term, 1)))
13578 term = XEXP (term, 0);
13579 if (GET_CODE (term) != UNSPEC
13580 || (XINT (term, 1) != UNSPEC_GOTPCREL
13581 && XINT (term, 1) != UNSPEC_PCREL))
13582 return x;
13583
13584 return XVECEXP (term, 0, 0);
13585 }
13586
13587 return ix86_delegitimize_address_1 (x, base_term_p: true);
13588}
13589
13590/* Return true if X shouldn't be emitted into the debug info.
13591 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
13592 symbol easily into the .debug_info section, so we need not to
13593 delegitimize, but instead assemble as @gotoff.
13594 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
13595 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
13596
13597static bool
13598ix86_const_not_ok_for_debug_p (rtx x)
13599{
13600 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
13601 return true;
13602
13603 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
13604 return true;
13605
13606 return false;
13607}
13608
13609static void
13610put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
13611 bool fp, FILE *file)
13612{
13613 const char *suffix;
13614
13615 if (mode == CCFPmode)
13616 {
13617 code = ix86_fp_compare_code_to_integer (code);
13618 mode = CCmode;
13619 }
13620 if (reverse)
13621 code = reverse_condition (code);
13622
13623 switch (code)
13624 {
13625 case EQ:
13626 gcc_assert (mode != CCGZmode);
13627 switch (mode)
13628 {
13629 case E_CCAmode:
13630 suffix = "a";
13631 break;
13632 case E_CCCmode:
13633 suffix = "c";
13634 break;
13635 case E_CCOmode:
13636 suffix = "o";
13637 break;
13638 case E_CCPmode:
13639 suffix = "p";
13640 break;
13641 case E_CCSmode:
13642 suffix = "s";
13643 break;
13644 default:
13645 suffix = "e";
13646 break;
13647 }
13648 break;
13649 case NE:
13650 gcc_assert (mode != CCGZmode);
13651 switch (mode)
13652 {
13653 case E_CCAmode:
13654 suffix = "na";
13655 break;
13656 case E_CCCmode:
13657 suffix = "nc";
13658 break;
13659 case E_CCOmode:
13660 suffix = "no";
13661 break;
13662 case E_CCPmode:
13663 suffix = "np";
13664 break;
13665 case E_CCSmode:
13666 suffix = "ns";
13667 break;
13668 default:
13669 suffix = "ne";
13670 break;
13671 }
13672 break;
13673 case GT:
13674 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13675 suffix = "g";
13676 break;
13677 case GTU:
13678 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13679 Those same assemblers have the same but opposite lossage on cmov. */
13680 if (mode == CCmode)
13681 suffix = fp ? "nbe" : "a";
13682 else
13683 gcc_unreachable ();
13684 break;
13685 case LT:
13686 switch (mode)
13687 {
13688 case E_CCNOmode:
13689 case E_CCGOCmode:
13690 suffix = "s";
13691 break;
13692
13693 case E_CCmode:
13694 case E_CCGCmode:
13695 case E_CCGZmode:
13696 suffix = "l";
13697 break;
13698
13699 default:
13700 gcc_unreachable ();
13701 }
13702 break;
13703 case LTU:
13704 if (mode == CCmode || mode == CCGZmode)
13705 suffix = "b";
13706 else if (mode == CCCmode)
13707 suffix = fp ? "b" : "c";
13708 else
13709 gcc_unreachable ();
13710 break;
13711 case GE:
13712 switch (mode)
13713 {
13714 case E_CCNOmode:
13715 case E_CCGOCmode:
13716 suffix = "ns";
13717 break;
13718
13719 case E_CCmode:
13720 case E_CCGCmode:
13721 case E_CCGZmode:
13722 suffix = "ge";
13723 break;
13724
13725 default:
13726 gcc_unreachable ();
13727 }
13728 break;
13729 case GEU:
13730 if (mode == CCmode || mode == CCGZmode)
13731 suffix = "nb";
13732 else if (mode == CCCmode)
13733 suffix = fp ? "nb" : "nc";
13734 else
13735 gcc_unreachable ();
13736 break;
13737 case LE:
13738 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13739 suffix = "le";
13740 break;
13741 case LEU:
13742 if (mode == CCmode)
13743 suffix = "be";
13744 else
13745 gcc_unreachable ();
13746 break;
13747 case UNORDERED:
13748 suffix = fp ? "u" : "p";
13749 break;
13750 case ORDERED:
13751 suffix = fp ? "nu" : "np";
13752 break;
13753 default:
13754 gcc_unreachable ();
13755 }
13756 fputs (s: suffix, stream: file);
13757}
13758
13759/* Print the name of register X to FILE based on its machine mode and number.
13760 If CODE is 'w', pretend the mode is HImode.
13761 If CODE is 'b', pretend the mode is QImode.
13762 If CODE is 'k', pretend the mode is SImode.
13763 If CODE is 'q', pretend the mode is DImode.
13764 If CODE is 'x', pretend the mode is V4SFmode.
13765 If CODE is 't', pretend the mode is V8SFmode.
13766 If CODE is 'g', pretend the mode is V16SFmode.
13767 If CODE is 'h', pretend the reg is the 'high' byte register.
13768 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13769 If CODE is 'd', duplicate the operand for AVX instruction.
13770 If CODE is 'V', print naked full integer register name without %.
13771 */
13772
13773void
13774print_reg (rtx x, int code, FILE *file)
13775{
13776 const char *reg;
13777 int msize;
13778 unsigned int regno;
13779 bool duplicated;
13780
13781 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
13782 putc (c: '%', stream: file);
13783
13784 if (x == pc_rtx)
13785 {
13786 gcc_assert (TARGET_64BIT);
13787 fputs (s: "rip", stream: file);
13788 return;
13789 }
13790
13791 if (code == 'y' && STACK_TOP_P (x))
13792 {
13793 fputs (s: "st(0)", stream: file);
13794 return;
13795 }
13796
13797 if (code == 'w')
13798 msize = 2;
13799 else if (code == 'b')
13800 msize = 1;
13801 else if (code == 'k')
13802 msize = 4;
13803 else if (code == 'q')
13804 msize = 8;
13805 else if (code == 'h')
13806 msize = 0;
13807 else if (code == 'x')
13808 msize = 16;
13809 else if (code == 't')
13810 msize = 32;
13811 else if (code == 'g')
13812 msize = 64;
13813 else
13814 msize = GET_MODE_SIZE (GET_MODE (x));
13815
13816 regno = REGNO (x);
13817
13818 if (regno == ARG_POINTER_REGNUM
13819 || regno == FRAME_POINTER_REGNUM
13820 || regno == FPSR_REG)
13821 {
13822 output_operand_lossage
13823 ("invalid use of register '%s'", reg_names[regno]);
13824 return;
13825 }
13826 else if (regno == FLAGS_REG)
13827 {
13828 output_operand_lossage ("invalid use of asm flag output");
13829 return;
13830 }
13831
13832 if (code == 'V')
13833 {
13834 if (GENERAL_REGNO_P (regno))
13835 msize = GET_MODE_SIZE (word_mode);
13836 else
13837 error ("%<V%> modifier on non-integer register");
13838 }
13839
13840 duplicated = code == 'd' && TARGET_AVX;
13841
13842 switch (msize)
13843 {
13844 case 16:
13845 case 12:
13846 case 8:
13847 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
13848 warning (0, "unsupported size for integer register");
13849 /* FALLTHRU */
13850 case 4:
13851 if (LEGACY_INT_REGNO_P (regno))
13852 putc (c: msize > 4 && TARGET_64BIT ? 'r' : 'e', stream: file);
13853 /* FALLTHRU */
13854 case 2:
13855 normal:
13856 reg = hi_reg_name[regno];
13857 break;
13858 case 1:
13859 if (regno >= ARRAY_SIZE (qi_reg_name))
13860 goto normal;
13861 if (!ANY_QI_REGNO_P (regno))
13862 error ("unsupported size for integer register");
13863 reg = qi_reg_name[regno];
13864 break;
13865 case 0:
13866 if (regno >= ARRAY_SIZE (qi_high_reg_name))
13867 goto normal;
13868 reg = qi_high_reg_name[regno];
13869 break;
13870 case 32:
13871 case 64:
13872 if (SSE_REGNO_P (regno))
13873 {
13874 gcc_assert (!duplicated);
13875 putc (c: msize == 32 ? 'y' : 'z', stream: file);
13876 reg = hi_reg_name[regno] + 1;
13877 break;
13878 }
13879 goto normal;
13880 default:
13881 gcc_unreachable ();
13882 }
13883
13884 fputs (s: reg, stream: file);
13885
13886 /* Irritatingly, AMD extended registers use
13887 different naming convention: "r%d[bwd]" */
13888 if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
13889 {
13890 gcc_assert (TARGET_64BIT);
13891 switch (msize)
13892 {
13893 case 0:
13894 error ("extended registers have no high halves");
13895 break;
13896 case 1:
13897 putc (c: 'b', stream: file);
13898 break;
13899 case 2:
13900 putc (c: 'w', stream: file);
13901 break;
13902 case 4:
13903 putc (c: 'd', stream: file);
13904 break;
13905 case 8:
13906 /* no suffix */
13907 break;
13908 default:
13909 error ("unsupported operand size for extended register");
13910 break;
13911 }
13912 return;
13913 }
13914
13915 if (duplicated)
13916 {
13917 if (ASSEMBLER_DIALECT == ASM_ATT)
13918 fprintf (stream: file, format: ", %%%s", reg);
13919 else
13920 fprintf (stream: file, format: ", %s", reg);
13921 }
13922}
13923
13924/* Meaning of CODE:
13925 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13926 C -- print opcode suffix for set/cmov insn.
13927 c -- like C, but print reversed condition
13928 F,f -- likewise, but for floating-point.
13929 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13930 otherwise nothing
13931 R -- print embedded rounding and sae.
13932 r -- print only sae.
13933 z -- print the opcode suffix for the size of the current operand.
13934 Z -- likewise, with special suffixes for x87 instructions.
13935 * -- print a star (in certain assembler syntax)
13936 A -- print an absolute memory reference.
13937 E -- print address with DImode register names if TARGET_64BIT.
13938 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13939 s -- print a shift double count, followed by the assemblers argument
13940 delimiter.
13941 b -- print the QImode name of the register for the indicated operand.
13942 %b0 would print %al if operands[0] is reg 0.
13943 w -- likewise, print the HImode name of the register.
13944 k -- likewise, print the SImode name of the register.
13945 q -- likewise, print the DImode name of the register.
13946 x -- likewise, print the V4SFmode name of the register.
13947 t -- likewise, print the V8SFmode name of the register.
13948 g -- likewise, print the V16SFmode name of the register.
13949 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13950 y -- print "st(0)" instead of "st" as a register.
13951 d -- print duplicated register operand for AVX instruction.
13952 D -- print condition for SSE cmp instruction.
13953 P -- if PIC, print an @PLT suffix. For -fno-plt, load function
13954 address from GOT.
13955 p -- print raw symbol name.
13956 X -- don't print any sort of PIC '@' suffix for a symbol.
13957 & -- print some in-use local-dynamic symbol name.
13958 H -- print a memory address offset by 8; used for sse high-parts
13959 Y -- print condition for XOP pcom* instruction.
13960 V -- print naked full integer register name without %.
13961 v -- print segment override prefix
13962 + -- print a branch hint as 'cs' or 'ds' prefix
13963 ; -- print a semicolon (after prefixes due to bug in older gas).
13964 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13965 ^ -- print addr32 prefix if Pmode != word_mode
13966 M -- print addr32 prefix for TARGET_X32 with VSIB address.
13967 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
13968 N -- print maskz if it's constant 0 operand.
13969 G -- print embedded flag for ccmp/ctest.
13970 */
13971
13972void
13973ix86_print_operand (FILE *file, rtx x, int code)
13974{
13975 if (code)
13976 {
13977 switch (code)
13978 {
13979 case 'A':
13980 switch (ASSEMBLER_DIALECT)
13981 {
13982 case ASM_ATT:
13983 putc (c: '*', stream: file);
13984 break;
13985
13986 case ASM_INTEL:
13987 /* Intel syntax. For absolute addresses, registers should not
13988 be surrounded by braces. */
13989 if (!REG_P (x))
13990 {
13991 putc (c: '[', stream: file);
13992 ix86_print_operand (file, x, code: 0);
13993 putc (c: ']', stream: file);
13994 return;
13995 }
13996 break;
13997
13998 default:
13999 gcc_unreachable ();
14000 }
14001
14002 ix86_print_operand (file, x, code: 0);
14003 return;
14004
14005 case 'E':
14006 /* Wrap address in an UNSPEC to declare special handling. */
14007 if (TARGET_64BIT)
14008 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14009
14010 output_address (VOIDmode, x);
14011 return;
14012
14013 case 'L':
14014 if (ASSEMBLER_DIALECT == ASM_ATT)
14015 putc (c: 'l', stream: file);
14016 return;
14017
14018 case 'W':
14019 if (ASSEMBLER_DIALECT == ASM_ATT)
14020 putc (c: 'w', stream: file);
14021 return;
14022
14023 case 'B':
14024 if (ASSEMBLER_DIALECT == ASM_ATT)
14025 putc (c: 'b', stream: file);
14026 return;
14027
14028 case 'Q':
14029 if (ASSEMBLER_DIALECT == ASM_ATT)
14030 putc (c: 'l', stream: file);
14031 return;
14032
14033 case 'S':
14034 if (ASSEMBLER_DIALECT == ASM_ATT)
14035 putc (c: 's', stream: file);
14036 return;
14037
14038 case 'T':
14039 if (ASSEMBLER_DIALECT == ASM_ATT)
14040 putc (c: 't', stream: file);
14041 return;
14042
14043 case 'O':
14044#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14045 if (ASSEMBLER_DIALECT != ASM_ATT)
14046 return;
14047
14048 switch (GET_MODE_SIZE (GET_MODE (x)))
14049 {
14050 case 2:
14051 putc ('w', file);
14052 break;
14053
14054 case 4:
14055 putc ('l', file);
14056 break;
14057
14058 case 8:
14059 putc ('q', file);
14060 break;
14061
14062 default:
14063 output_operand_lossage ("invalid operand size for operand "
14064 "code 'O'");
14065 return;
14066 }
14067
14068 putc ('.', file);
14069#endif
14070 return;
14071
14072 case 'z':
14073 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14074 {
14075 /* Opcodes don't get size suffixes if using Intel opcodes. */
14076 if (ASSEMBLER_DIALECT == ASM_INTEL)
14077 return;
14078
14079 switch (GET_MODE_SIZE (GET_MODE (x)))
14080 {
14081 case 1:
14082 putc (c: 'b', stream: file);
14083 return;
14084
14085 case 2:
14086 putc (c: 'w', stream: file);
14087 return;
14088
14089 case 4:
14090 putc (c: 'l', stream: file);
14091 return;
14092
14093 case 8:
14094 putc (c: 'q', stream: file);
14095 return;
14096
14097 default:
14098 output_operand_lossage ("invalid operand size for operand "
14099 "code 'z'");
14100 return;
14101 }
14102 }
14103
14104 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14105 {
14106 if (this_is_asm_operands)
14107 warning_for_asm (this_is_asm_operands,
14108 "non-integer operand used with operand code %<z%>");
14109 else
14110 warning (0, "non-integer operand used with operand code %<z%>");
14111 }
14112 /* FALLTHRU */
14113
14114 case 'Z':
14115 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14116 if (ASSEMBLER_DIALECT == ASM_INTEL)
14117 return;
14118
14119 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14120 {
14121 switch (GET_MODE_SIZE (GET_MODE (x)))
14122 {
14123 case 2:
14124#ifdef HAVE_AS_IX86_FILDS
14125 putc (c: 's', stream: file);
14126#endif
14127 return;
14128
14129 case 4:
14130 putc (c: 'l', stream: file);
14131 return;
14132
14133 case 8:
14134#ifdef HAVE_AS_IX86_FILDQ
14135 putc (c: 'q', stream: file);
14136#else
14137 fputs ("ll", file);
14138#endif
14139 return;
14140
14141 default:
14142 break;
14143 }
14144 }
14145 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14146 {
14147 /* 387 opcodes don't get size suffixes
14148 if the operands are registers. */
14149 if (STACK_REG_P (x))
14150 return;
14151
14152 switch (GET_MODE_SIZE (GET_MODE (x)))
14153 {
14154 case 4:
14155 putc (c: 's', stream: file);
14156 return;
14157
14158 case 8:
14159 putc (c: 'l', stream: file);
14160 return;
14161
14162 case 12:
14163 case 16:
14164 putc (c: 't', stream: file);
14165 return;
14166
14167 default:
14168 break;
14169 }
14170 }
14171 else
14172 {
14173 output_operand_lossage ("invalid operand type used with "
14174 "operand code '%c'", code);
14175 return;
14176 }
14177
14178 output_operand_lossage ("invalid operand size for operand code '%c'",
14179 code);
14180 return;
14181
14182 case 'd':
14183 case 'b':
14184 case 'w':
14185 case 'k':
14186 case 'q':
14187 case 'h':
14188 case 't':
14189 case 'g':
14190 case 'y':
14191 case 'x':
14192 case 'X':
14193 case 'P':
14194 case 'p':
14195 case 'V':
14196 break;
14197
14198 case 's':
14199 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14200 {
14201 ix86_print_operand (file, x, code: 0);
14202 fputs (s: ", ", stream: file);
14203 }
14204 return;
14205
14206 case 'Y':
14207 switch (GET_CODE (x))
14208 {
14209 case NE:
14210 fputs (s: "neq", stream: file);
14211 break;
14212 case EQ:
14213 fputs (s: "eq", stream: file);
14214 break;
14215 case GE:
14216 case GEU:
14217 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", stream: file);
14218 break;
14219 case GT:
14220 case GTU:
14221 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", stream: file);
14222 break;
14223 case LE:
14224 case LEU:
14225 fputs (s: "le", stream: file);
14226 break;
14227 case LT:
14228 case LTU:
14229 fputs (s: "lt", stream: file);
14230 break;
14231 case UNORDERED:
14232 fputs (s: "unord", stream: file);
14233 break;
14234 case ORDERED:
14235 fputs (s: "ord", stream: file);
14236 break;
14237 case UNEQ:
14238 fputs (s: "ueq", stream: file);
14239 break;
14240 case UNGE:
14241 fputs (s: "nlt", stream: file);
14242 break;
14243 case UNGT:
14244 fputs (s: "nle", stream: file);
14245 break;
14246 case UNLE:
14247 fputs (s: "ule", stream: file);
14248 break;
14249 case UNLT:
14250 fputs (s: "ult", stream: file);
14251 break;
14252 case LTGT:
14253 fputs (s: "une", stream: file);
14254 break;
14255 default:
14256 output_operand_lossage ("operand is not a condition code, "
14257 "invalid operand code 'Y'");
14258 return;
14259 }
14260 return;
14261
14262 case 'D':
14263 /* Little bit of braindamage here. The SSE compare instructions
14264 does use completely different names for the comparisons that the
14265 fp conditional moves. */
14266 switch (GET_CODE (x))
14267 {
14268 case UNEQ:
14269 if (TARGET_AVX)
14270 {
14271 fputs (s: "eq_us", stream: file);
14272 break;
14273 }
14274 /* FALLTHRU */
14275 case EQ:
14276 fputs (s: "eq", stream: file);
14277 break;
14278 case UNLT:
14279 if (TARGET_AVX)
14280 {
14281 fputs (s: "nge", stream: file);
14282 break;
14283 }
14284 /* FALLTHRU */
14285 case LT:
14286 fputs (s: "lt", stream: file);
14287 break;
14288 case UNLE:
14289 if (TARGET_AVX)
14290 {
14291 fputs (s: "ngt", stream: file);
14292 break;
14293 }
14294 /* FALLTHRU */
14295 case LE:
14296 fputs (s: "le", stream: file);
14297 break;
14298 case UNORDERED:
14299 fputs (s: "unord", stream: file);
14300 break;
14301 case LTGT:
14302 if (TARGET_AVX)
14303 {
14304 fputs (s: "neq_oq", stream: file);
14305 break;
14306 }
14307 /* FALLTHRU */
14308 case NE:
14309 fputs (s: "neq", stream: file);
14310 break;
14311 case GE:
14312 if (TARGET_AVX)
14313 {
14314 fputs (s: "ge", stream: file);
14315 break;
14316 }
14317 /* FALLTHRU */
14318 case UNGE:
14319 fputs (s: "nlt", stream: file);
14320 break;
14321 case GT:
14322 if (TARGET_AVX)
14323 {
14324 fputs (s: "gt", stream: file);
14325 break;
14326 }
14327 /* FALLTHRU */
14328 case UNGT:
14329 fputs (s: "nle", stream: file);
14330 break;
14331 case ORDERED:
14332 fputs (s: "ord", stream: file);
14333 break;
14334 default:
14335 output_operand_lossage ("operand is not a condition code, "
14336 "invalid operand code 'D'");
14337 return;
14338 }
14339 return;
14340
14341 case 'F':
14342 case 'f':
14343#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14344 if (ASSEMBLER_DIALECT == ASM_ATT)
14345 putc ('.', file);
14346 gcc_fallthrough ();
14347#endif
14348
14349 case 'C':
14350 case 'c':
14351 if (!COMPARISON_P (x))
14352 {
14353 output_operand_lossage ("operand is not a condition code, "
14354 "invalid operand code '%c'", code);
14355 return;
14356 }
14357 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
14358 reverse: code == 'c' || code == 'f',
14359 fp: code == 'F' || code == 'f',
14360 file);
14361 return;
14362
14363 case 'G':
14364 {
14365 int dfv = INTVAL (x);
14366 const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
14367 fputs (s: dfv_suffix, stream: file);
14368 }
14369 return;
14370
14371 case 'H':
14372 if (!offsettable_memref_p (x))
14373 {
14374 output_operand_lossage ("operand is not an offsettable memory "
14375 "reference, invalid operand code 'H'");
14376 return;
14377 }
14378 /* It doesn't actually matter what mode we use here, as we're
14379 only going to use this for printing. */
14380 x = adjust_address_nv (x, DImode, 8);
14381 /* Output 'qword ptr' for intel assembler dialect. */
14382 if (ASSEMBLER_DIALECT == ASM_INTEL)
14383 code = 'q';
14384 break;
14385
14386 case 'K':
14387 if (!CONST_INT_P (x))
14388 {
14389 output_operand_lossage ("operand is not an integer, invalid "
14390 "operand code 'K'");
14391 return;
14392 }
14393
14394 if (INTVAL (x) & IX86_HLE_ACQUIRE)
14395#ifdef HAVE_AS_IX86_HLE
14396 fputs (s: "xacquire ", stream: file);
14397#else
14398 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
14399#endif
14400 else if (INTVAL (x) & IX86_HLE_RELEASE)
14401#ifdef HAVE_AS_IX86_HLE
14402 fputs (s: "xrelease ", stream: file);
14403#else
14404 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
14405#endif
14406 /* We do not want to print value of the operand. */
14407 return;
14408
14409 case 'N':
14410 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
14411 fputs (s: "{z}", stream: file);
14412 return;
14413
14414 case 'r':
14415 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
14416 {
14417 output_operand_lossage ("operand is not a specific integer, "
14418 "invalid operand code 'r'");
14419 return;
14420 }
14421
14422 if (ASSEMBLER_DIALECT == ASM_INTEL)
14423 fputs (s: ", ", stream: file);
14424
14425 fputs (s: "{sae}", stream: file);
14426
14427 if (ASSEMBLER_DIALECT == ASM_ATT)
14428 fputs (s: ", ", stream: file);
14429
14430 return;
14431
14432 case 'R':
14433 if (!CONST_INT_P (x))
14434 {
14435 output_operand_lossage ("operand is not an integer, invalid "
14436 "operand code 'R'");
14437 return;
14438 }
14439
14440 if (ASSEMBLER_DIALECT == ASM_INTEL)
14441 fputs (s: ", ", stream: file);
14442
14443 switch (INTVAL (x))
14444 {
14445 case ROUND_NEAREST_INT | ROUND_SAE:
14446 fputs (s: "{rn-sae}", stream: file);
14447 break;
14448 case ROUND_NEG_INF | ROUND_SAE:
14449 fputs (s: "{rd-sae}", stream: file);
14450 break;
14451 case ROUND_POS_INF | ROUND_SAE:
14452 fputs (s: "{ru-sae}", stream: file);
14453 break;
14454 case ROUND_ZERO | ROUND_SAE:
14455 fputs (s: "{rz-sae}", stream: file);
14456 break;
14457 default:
14458 output_operand_lossage ("operand is not a specific integer, "
14459 "invalid operand code 'R'");
14460 }
14461
14462 if (ASSEMBLER_DIALECT == ASM_ATT)
14463 fputs (s: ", ", stream: file);
14464
14465 return;
14466
14467 case 'v':
14468 if (MEM_P (x))
14469 {
14470 switch (MEM_ADDR_SPACE (x))
14471 {
14472 case ADDR_SPACE_GENERIC:
14473 break;
14474 case ADDR_SPACE_SEG_FS:
14475 fputs (s: "fs ", stream: file);
14476 break;
14477 case ADDR_SPACE_SEG_GS:
14478 fputs (s: "gs ", stream: file);
14479 break;
14480 default:
14481 gcc_unreachable ();
14482 }
14483 }
14484 else
14485 output_operand_lossage ("operand is not a memory reference, "
14486 "invalid operand code 'v'");
14487 return;
14488
14489 case '*':
14490 if (ASSEMBLER_DIALECT == ASM_ATT)
14491 putc (c: '*', stream: file);
14492 return;
14493
14494 case '&':
14495 {
14496 const char *name = get_some_local_dynamic_name ();
14497 if (name == NULL)
14498 output_operand_lossage ("'%%&' used without any "
14499 "local dynamic TLS references");
14500 else
14501 assemble_name (file, name);
14502 return;
14503 }
14504
14505 case '+':
14506 {
14507 rtx x;
14508
14509 if (!optimize
14510 || optimize_function_for_size_p (cfun)
14511 || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
14512 && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
14513 return;
14514
14515 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14516 if (x)
14517 {
14518 int pred_val = profile_probability::from_reg_br_prob_note
14519 (XINT (x, 0)).to_reg_br_prob_base ();
14520
14521 bool taken = pred_val > REG_BR_PROB_BASE / 2;
14522 /* We use 3e (DS) prefix for taken branches and
14523 2e (CS) prefix for not taken branches. */
14524 if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
14525 fputs (s: "ds ; ", stream: file);
14526 else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
14527 fputs (s: "cs ; ", stream: file);
14528 }
14529 return;
14530 }
14531
14532 case ';':
14533#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14534 putc (';', file);
14535#endif
14536 return;
14537
14538 case '~':
14539 putc (TARGET_AVX2 ? 'i' : 'f', stream: file);
14540 return;
14541
14542 case 'M':
14543 if (TARGET_X32)
14544 {
14545 /* NB: 32-bit indices in VSIB address are sign-extended
14546 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
14547 sign-extended to 0xfffffffff7fa3010 which is invalid
14548 address. Add addr32 prefix if there is no base
14549 register nor symbol. */
14550 bool ok;
14551 struct ix86_address parts;
14552 ok = ix86_decompose_address (addr: x, out: &parts);
14553 gcc_assert (ok && parts.index == NULL_RTX);
14554 if (parts.base == NULL_RTX
14555 && (parts.disp == NULL_RTX
14556 || !symbolic_operand (parts.disp,
14557 GET_MODE (parts.disp))))
14558 fputs (s: "addr32 ", stream: file);
14559 }
14560 return;
14561
14562 case '^':
14563 if (Pmode != word_mode)
14564 fputs (s: "addr32 ", stream: file);
14565 return;
14566
14567 case '!':
14568 if (ix86_notrack_prefixed_insn_p (current_output_insn))
14569 fputs (s: "notrack ", stream: file);
14570 return;
14571
14572 default:
14573 output_operand_lossage ("invalid operand code '%c'", code);
14574 }
14575 }
14576
14577 if (REG_P (x))
14578 print_reg (x, code, file);
14579
14580 else if (MEM_P (x))
14581 {
14582 rtx addr = XEXP (x, 0);
14583
14584 /* No `byte ptr' prefix for call instructions ... */
14585 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
14586 {
14587 machine_mode mode = GET_MODE (x);
14588 const char *size;
14589
14590 /* Check for explicit size override codes. */
14591 if (code == 'b')
14592 size = "BYTE";
14593 else if (code == 'w')
14594 size = "WORD";
14595 else if (code == 'k')
14596 size = "DWORD";
14597 else if (code == 'q')
14598 size = "QWORD";
14599 else if (code == 'x')
14600 size = "XMMWORD";
14601 else if (code == 't')
14602 size = "YMMWORD";
14603 else if (code == 'g')
14604 size = "ZMMWORD";
14605 else if (mode == BLKmode)
14606 /* ... or BLKmode operands, when not overridden. */
14607 size = NULL;
14608 else
14609 switch (GET_MODE_SIZE (mode))
14610 {
14611 case 1: size = "BYTE"; break;
14612 case 2: size = "WORD"; break;
14613 case 4: size = "DWORD"; break;
14614 case 8: size = "QWORD"; break;
14615 case 12: size = "TBYTE"; break;
14616 case 16:
14617 if (mode == XFmode)
14618 size = "TBYTE";
14619 else
14620 size = "XMMWORD";
14621 break;
14622 case 32: size = "YMMWORD"; break;
14623 case 64: size = "ZMMWORD"; break;
14624 default:
14625 gcc_unreachable ();
14626 }
14627 if (size)
14628 {
14629 fputs (s: size, stream: file);
14630 fputs (s: " PTR ", stream: file);
14631 }
14632 }
14633
14634 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14635 output_operand_lossage ("invalid constraints for operand");
14636 else
14637 ix86_print_operand_address_as
14638 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
14639 }
14640
14641 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
14642 {
14643 long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
14644 REAL_MODE_FORMAT (HFmode));
14645 if (ASSEMBLER_DIALECT == ASM_ATT)
14646 putc (c: '$', stream: file);
14647 fprintf (stream: file, format: "0x%04x", (unsigned int) l);
14648 }
14649
14650 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
14651 {
14652 long l;
14653
14654 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14655
14656 if (ASSEMBLER_DIALECT == ASM_ATT)
14657 putc (c: '$', stream: file);
14658 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14659 if (code == 'q')
14660 fprintf (stream: file, format: "0x%08" HOST_LONG_LONG_FORMAT "x",
14661 (unsigned long long) (int) l);
14662 else
14663 fprintf (stream: file, format: "0x%08x", (unsigned int) l);
14664 }
14665
14666 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
14667 {
14668 long l[2];
14669
14670 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14671
14672 if (ASSEMBLER_DIALECT == ASM_ATT)
14673 putc (c: '$', stream: file);
14674 fprintf (stream: file, format: "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14675 }
14676
14677 /* These float cases don't actually occur as immediate operands. */
14678 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
14679 {
14680 char dstr[30];
14681
14682 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14683 fputs (s: dstr, stream: file);
14684 }
14685
14686 /* Print bcst_mem_operand. */
14687 else if (GET_CODE (x) == VEC_DUPLICATE)
14688 {
14689 machine_mode vmode = GET_MODE (x);
14690 /* Must be bcst_memory_operand. */
14691 gcc_assert (bcst_mem_operand (x, vmode));
14692
14693 rtx mem = XEXP (x,0);
14694 ix86_print_operand (file, x: mem, code: 0);
14695
14696 switch (vmode)
14697 {
14698 case E_V2DImode:
14699 case E_V2DFmode:
14700 fputs (s: "{1to2}", stream: file);
14701 break;
14702 case E_V4SImode:
14703 case E_V4SFmode:
14704 case E_V4DImode:
14705 case E_V4DFmode:
14706 fputs (s: "{1to4}", stream: file);
14707 break;
14708 case E_V8SImode:
14709 case E_V8SFmode:
14710 case E_V8DFmode:
14711 case E_V8DImode:
14712 case E_V8HFmode:
14713 fputs (s: "{1to8}", stream: file);
14714 break;
14715 case E_V16SFmode:
14716 case E_V16SImode:
14717 case E_V16HFmode:
14718 fputs (s: "{1to16}", stream: file);
14719 break;
14720 case E_V32HFmode:
14721 fputs (s: "{1to32}", stream: file);
14722 break;
14723 default:
14724 gcc_unreachable ();
14725 }
14726 }
14727
14728 else
14729 {
14730 /* We have patterns that allow zero sets of memory, for instance.
14731 In 64-bit mode, we should probably support all 8-byte vectors,
14732 since we can in fact encode that into an immediate. */
14733 if (CONST_VECTOR_P (x))
14734 {
14735 if (x != CONST0_RTX (GET_MODE (x)))
14736 output_operand_lossage ("invalid vector immediate");
14737 x = const0_rtx;
14738 }
14739
14740 if (code == 'P')
14741 {
14742 if (ix86_force_load_from_GOT_p (x, call_p: true))
14743 {
14744 /* For inline assembly statement, load function address
14745 from GOT with 'P' operand modifier to avoid PLT. */
14746 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14747 (TARGET_64BIT
14748 ? UNSPEC_GOTPCREL
14749 : UNSPEC_GOT));
14750 x = gen_rtx_CONST (Pmode, x);
14751 x = gen_const_mem (Pmode, x);
14752 ix86_print_operand (file, x, code: 'A');
14753 return;
14754 }
14755 }
14756 else if (code != 'p')
14757 {
14758 if (CONST_INT_P (x))
14759 {
14760 if (ASSEMBLER_DIALECT == ASM_ATT)
14761 putc (c: '$', stream: file);
14762 }
14763 else if (GET_CODE (x) == CONST || SYMBOL_REF_P (x)
14764 || LABEL_REF_P (x))
14765 {
14766 if (ASSEMBLER_DIALECT == ASM_ATT)
14767 putc (c: '$', stream: file);
14768 else
14769 fputs (s: "OFFSET FLAT:", stream: file);
14770 }
14771 }
14772 if (CONST_INT_P (x))
14773 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14774 else if (flag_pic || MACHOPIC_INDIRECT)
14775 output_pic_addr_const (file, x, code);
14776 else
14777 output_addr_const (file, x);
14778 }
14779}
14780
14781static bool
14782ix86_print_operand_punct_valid_p (unsigned char code)
14783{
14784 return (code == '*' || code == '+' || code == '&' || code == ';'
14785 || code == '~' || code == '^' || code == '!');
14786}
14787
14788/* Print a memory operand whose address is ADDR. */
14789
14790static void
14791ix86_print_operand_address_as (FILE *file, rtx addr,
14792 addr_space_t as, bool raw)
14793{
14794 struct ix86_address parts;
14795 rtx base, index, disp;
14796 int scale;
14797 int ok;
14798 bool vsib = false;
14799 int code = 0;
14800
14801 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14802 {
14803 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts);
14804 gcc_assert (parts.index == NULL_RTX);
14805 parts.index = XVECEXP (addr, 0, 1);
14806 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14807 addr = XVECEXP (addr, 0, 0);
14808 vsib = true;
14809 }
14810 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14811 {
14812 gcc_assert (TARGET_64BIT);
14813 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts);
14814 code = 'q';
14815 }
14816 else
14817 ok = ix86_decompose_address (addr, out: &parts);
14818
14819 gcc_assert (ok);
14820
14821 base = parts.base;
14822 index = parts.index;
14823 disp = parts.disp;
14824 scale = parts.scale;
14825
14826 if (ADDR_SPACE_GENERIC_P (as))
14827 as = parts.seg;
14828 else
14829 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
14830
14831 if (!ADDR_SPACE_GENERIC_P (as) && !raw)
14832 {
14833 if (ASSEMBLER_DIALECT == ASM_ATT)
14834 putc (c: '%', stream: file);
14835
14836 switch (as)
14837 {
14838 case ADDR_SPACE_SEG_FS:
14839 fputs (s: "fs:", stream: file);
14840 break;
14841 case ADDR_SPACE_SEG_GS:
14842 fputs (s: "gs:", stream: file);
14843 break;
14844 default:
14845 gcc_unreachable ();
14846 }
14847 }
14848
14849 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14850 if (TARGET_64BIT && !base && !index && !raw)
14851 {
14852 rtx symbol = disp;
14853
14854 if (GET_CODE (disp) == CONST
14855 && GET_CODE (XEXP (disp, 0)) == PLUS
14856 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14857 symbol = XEXP (XEXP (disp, 0), 0);
14858
14859 if (LABEL_REF_P (symbol)
14860 || (SYMBOL_REF_P (symbol)
14861 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14862 base = pc_rtx;
14863 }
14864
14865 if (!base && !index)
14866 {
14867 /* Displacement only requires special attention. */
14868 if (CONST_INT_P (disp))
14869 {
14870 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
14871 fputs (s: "ds:", stream: file);
14872 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14873 }
14874 /* Load the external function address via the GOT slot to avoid PLT. */
14875 else if (GET_CODE (disp) == CONST
14876 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14877 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
14878 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
14879 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
14880 output_pic_addr_const (file, x: disp, code: 0);
14881 else if (flag_pic)
14882 output_pic_addr_const (file, x: disp, code: 0);
14883 else
14884 output_addr_const (file, disp);
14885 }
14886 else
14887 {
14888 /* Print SImode register names to force addr32 prefix. */
14889 if (SImode_address_operand (addr, VOIDmode))
14890 {
14891 if (flag_checking)
14892 {
14893 gcc_assert (TARGET_64BIT);
14894 switch (GET_CODE (addr))
14895 {
14896 case SUBREG:
14897 gcc_assert (GET_MODE (addr) == SImode);
14898 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14899 break;
14900 case ZERO_EXTEND:
14901 case AND:
14902 gcc_assert (GET_MODE (addr) == DImode);
14903 break;
14904 default:
14905 gcc_unreachable ();
14906 }
14907 }
14908 gcc_assert (!code);
14909 code = 'k';
14910 }
14911 else if (code == 0
14912 && TARGET_X32
14913 && disp
14914 && CONST_INT_P (disp)
14915 && INTVAL (disp) < -16*1024*1024)
14916 {
14917 /* X32 runs in 64-bit mode, where displacement, DISP, in
14918 address DISP(%r64), is encoded as 32-bit immediate sign-
14919 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14920 address is %r64 + 0xffffffffbffffd00. When %r64 <
14921 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14922 which is invalid for x32. The correct address is %r64
14923 - 0x40000300 == 0xf7ffdd64. To properly encode
14924 -0x40000300(%r64) for x32, we zero-extend negative
14925 displacement by forcing addr32 prefix which truncates
14926 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14927 zero-extend all negative displacements, including -1(%rsp).
14928 However, for small negative displacements, sign-extension
14929 won't cause overflow. We only zero-extend negative
14930 displacements if they < -16*1024*1024, which is also used
14931 to check legitimate address displacements for PIC. */
14932 code = 'k';
14933 }
14934
14935 /* Since the upper 32 bits of RSP are always zero for x32,
14936 we can encode %esp as %rsp to avoid 0x67 prefix if
14937 there is no index register. */
14938 if (TARGET_X32 && Pmode == SImode
14939 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
14940 code = 'q';
14941
14942 if (ASSEMBLER_DIALECT == ASM_ATT)
14943 {
14944 if (disp)
14945 {
14946 if (flag_pic)
14947 output_pic_addr_const (file, x: disp, code: 0);
14948 else if (LABEL_REF_P (disp))
14949 output_asm_label (disp);
14950 else
14951 output_addr_const (file, disp);
14952 }
14953
14954 putc (c: '(', stream: file);
14955 if (base)
14956 print_reg (x: base, code, file);
14957 if (index)
14958 {
14959 putc (c: ',', stream: file);
14960 print_reg (x: index, code: vsib ? 0 : code, file);
14961 if (scale != 1 || vsib)
14962 fprintf (stream: file, format: ",%d", scale);
14963 }
14964 putc (c: ')', stream: file);
14965 }
14966 else
14967 {
14968 rtx offset = NULL_RTX;
14969
14970 if (disp)
14971 {
14972 /* Pull out the offset of a symbol; print any symbol itself. */
14973 if (GET_CODE (disp) == CONST
14974 && GET_CODE (XEXP (disp, 0)) == PLUS
14975 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14976 {
14977 offset = XEXP (XEXP (disp, 0), 1);
14978 disp = gen_rtx_CONST (VOIDmode,
14979 XEXP (XEXP (disp, 0), 0));
14980 }
14981
14982 if (flag_pic)
14983 output_pic_addr_const (file, x: disp, code: 0);
14984 else if (LABEL_REF_P (disp))
14985 output_asm_label (disp);
14986 else if (CONST_INT_P (disp))
14987 offset = disp;
14988 else
14989 output_addr_const (file, disp);
14990 }
14991
14992 putc (c: '[', stream: file);
14993 if (base)
14994 {
14995 print_reg (x: base, code, file);
14996 if (offset)
14997 {
14998 if (INTVAL (offset) >= 0)
14999 putc (c: '+', stream: file);
15000 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15001 }
15002 }
15003 else if (offset)
15004 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15005 else
15006 putc (c: '0', stream: file);
15007
15008 if (index)
15009 {
15010 putc (c: '+', stream: file);
15011 print_reg (x: index, code: vsib ? 0 : code, file);
15012 if (scale != 1 || vsib)
15013 fprintf (stream: file, format: "*%d", scale);
15014 }
15015 putc (c: ']', stream: file);
15016 }
15017 }
15018}
15019
15020static void
15021ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
15022{
15023 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
15024 output_operand_lossage ("invalid constraints for operand");
15025 else
15026 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, raw: false);
15027}
15028
15029/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15030
15031static bool
15032i386_asm_output_addr_const_extra (FILE *file, rtx x)
15033{
15034 rtx op;
15035
15036 if (GET_CODE (x) != UNSPEC)
15037 return false;
15038
15039 op = XVECEXP (x, 0, 0);
15040 switch (XINT (x, 1))
15041 {
15042 case UNSPEC_GOTOFF:
15043 output_addr_const (file, op);
15044 fputs (s: "@gotoff", stream: file);
15045 break;
15046 case UNSPEC_GOTTPOFF:
15047 output_addr_const (file, op);
15048 /* FIXME: This might be @TPOFF in Sun ld. */
15049 fputs (s: "@gottpoff", stream: file);
15050 break;
15051 case UNSPEC_TPOFF:
15052 output_addr_const (file, op);
15053 fputs (s: "@tpoff", stream: file);
15054 break;
15055 case UNSPEC_NTPOFF:
15056 output_addr_const (file, op);
15057 if (TARGET_64BIT)
15058 fputs (s: "@tpoff", stream: file);
15059 else
15060 fputs (s: "@ntpoff", stream: file);
15061 break;
15062 case UNSPEC_DTPOFF:
15063 output_addr_const (file, op);
15064 fputs (s: "@dtpoff", stream: file);
15065 break;
15066 case UNSPEC_GOTNTPOFF:
15067 output_addr_const (file, op);
15068 if (TARGET_64BIT)
15069 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15070 "@gottpoff(%rip)" : "@gottpoff[rip]", stream: file);
15071 else
15072 fputs (s: "@gotntpoff", stream: file);
15073 break;
15074 case UNSPEC_INDNTPOFF:
15075 output_addr_const (file, op);
15076 fputs (s: "@indntpoff", stream: file);
15077 break;
15078 case UNSPEC_SECREL32:
15079 output_addr_const (file, op);
15080 fputs (s: "@secrel32", stream: file);
15081 break;
15082#if TARGET_MACHO
15083 case UNSPEC_MACHOPIC_OFFSET:
15084 output_addr_const (file, op);
15085 putc ('-', file);
15086 machopic_output_function_base_name (file);
15087 break;
15088#endif
15089
15090 default:
15091 return false;
15092 }
15093
15094 return true;
15095}
15096
15097
15098/* Output code to perform a 387 binary operation in INSN, one of PLUS,
15099 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15100 is the expression of the binary operation. The output may either be
15101 emitted here, or returned to the caller, like all output_* functions.
15102
15103 There is no guarantee that the operands are the same mode, as they
15104 might be within FLOAT or FLOAT_EXTEND expressions. */
15105
15106#ifndef SYSV386_COMPAT
15107/* Set to 1 for compatibility with brain-damaged assemblers. No-one
15108 wants to fix the assemblers because that causes incompatibility
15109 with gcc. No-one wants to fix gcc because that causes
15110 incompatibility with assemblers... You can use the option of
15111 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15112#define SYSV386_COMPAT 1
15113#endif
15114
15115const char *
15116output_387_binary_op (rtx_insn *insn, rtx *operands)
15117{
15118 static char buf[40];
15119 const char *p;
15120 bool is_sse
15121 = (SSE_REG_P (operands[0])
15122 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
15123
15124 if (is_sse)
15125 p = "%v";
15126 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15127 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15128 p = "fi";
15129 else
15130 p = "f";
15131
15132 strcpy (dest: buf, src: p);
15133
15134 switch (GET_CODE (operands[3]))
15135 {
15136 case PLUS:
15137 p = "add"; break;
15138 case MINUS:
15139 p = "sub"; break;
15140 case MULT:
15141 p = "mul"; break;
15142 case DIV:
15143 p = "div"; break;
15144 default:
15145 gcc_unreachable ();
15146 }
15147
15148 strcat (dest: buf, src: p);
15149
15150 if (is_sse)
15151 {
15152 p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
15153 strcat (dest: buf, src: p);
15154
15155 if (TARGET_AVX)
15156 p = "\t{%2, %1, %0|%0, %1, %2}";
15157 else
15158 p = "\t{%2, %0|%0, %2}";
15159
15160 strcat (dest: buf, src: p);
15161 return buf;
15162 }
15163
15164 /* Even if we do not want to check the inputs, this documents input
15165 constraints. Which helps in understanding the following code. */
15166 if (flag_checking)
15167 {
15168 if (STACK_REG_P (operands[0])
15169 && ((REG_P (operands[1])
15170 && REGNO (operands[0]) == REGNO (operands[1])
15171 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15172 || (REG_P (operands[2])
15173 && REGNO (operands[0]) == REGNO (operands[2])
15174 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15175 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15176 ; /* ok */
15177 else
15178 gcc_unreachable ();
15179 }
15180
15181 switch (GET_CODE (operands[3]))
15182 {
15183 case MULT:
15184 case PLUS:
15185 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15186 std::swap (a&: operands[1], b&: operands[2]);
15187
15188 /* know operands[0] == operands[1]. */
15189
15190 if (MEM_P (operands[2]))
15191 {
15192 p = "%Z2\t%2";
15193 break;
15194 }
15195
15196 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15197 {
15198 if (STACK_TOP_P (operands[0]))
15199 /* How is it that we are storing to a dead operand[2]?
15200 Well, presumably operands[1] is dead too. We can't
15201 store the result to st(0) as st(0) gets popped on this
15202 instruction. Instead store to operands[2] (which I
15203 think has to be st(1)). st(1) will be popped later.
15204 gcc <= 2.8.1 didn't have this check and generated
15205 assembly code that the Unixware assembler rejected. */
15206 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15207 else
15208 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15209 break;
15210 }
15211
15212 if (STACK_TOP_P (operands[0]))
15213 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15214 else
15215 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15216 break;
15217
15218 case MINUS:
15219 case DIV:
15220 if (MEM_P (operands[1]))
15221 {
15222 p = "r%Z1\t%1";
15223 break;
15224 }
15225
15226 if (MEM_P (operands[2]))
15227 {
15228 p = "%Z2\t%2";
15229 break;
15230 }
15231
15232 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15233 {
15234#if SYSV386_COMPAT
15235 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15236 derived assemblers, confusingly reverse the direction of
15237 the operation for fsub{r} and fdiv{r} when the
15238 destination register is not st(0). The Intel assembler
15239 doesn't have this brain damage. Read !SYSV386_COMPAT to
15240 figure out what the hardware really does. */
15241 if (STACK_TOP_P (operands[0]))
15242 p = "{p\t%0, %2|rp\t%2, %0}";
15243 else
15244 p = "{rp\t%2, %0|p\t%0, %2}";
15245#else
15246 if (STACK_TOP_P (operands[0]))
15247 /* As above for fmul/fadd, we can't store to st(0). */
15248 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15249 else
15250 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15251#endif
15252 break;
15253 }
15254
15255 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15256 {
15257#if SYSV386_COMPAT
15258 if (STACK_TOP_P (operands[0]))
15259 p = "{rp\t%0, %1|p\t%1, %0}";
15260 else
15261 p = "{p\t%1, %0|rp\t%0, %1}";
15262#else
15263 if (STACK_TOP_P (operands[0]))
15264 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15265 else
15266 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15267#endif
15268 break;
15269 }
15270
15271 if (STACK_TOP_P (operands[0]))
15272 {
15273 if (STACK_TOP_P (operands[1]))
15274 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15275 else
15276 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15277 break;
15278 }
15279 else if (STACK_TOP_P (operands[1]))
15280 {
15281#if SYSV386_COMPAT
15282 p = "{\t%1, %0|r\t%0, %1}";
15283#else
15284 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15285#endif
15286 }
15287 else
15288 {
15289#if SYSV386_COMPAT
15290 p = "{r\t%2, %0|\t%0, %2}";
15291#else
15292 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15293#endif
15294 }
15295 break;
15296
15297 default:
15298 gcc_unreachable ();
15299 }
15300
15301 strcat (dest: buf, src: p);
15302 return buf;
15303}
15304
15305/* Return needed mode for entity in optimize_mode_switching pass. */
15306
15307static int
15308ix86_dirflag_mode_needed (rtx_insn *insn)
15309{
15310 if (CALL_P (insn))
15311 {
15312 if (cfun->machine->func_type == TYPE_NORMAL)
15313 return X86_DIRFLAG_ANY;
15314 else
15315 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
15316 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
15317 }
15318
15319 if (recog_memoized (insn) < 0)
15320 return X86_DIRFLAG_ANY;
15321
15322 if (get_attr_type (insn) == TYPE_STR)
15323 {
15324 /* Emit cld instruction if stringops are used in the function. */
15325 if (cfun->machine->func_type == TYPE_NORMAL)
15326 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
15327 else
15328 return X86_DIRFLAG_RESET;
15329 }
15330
15331 return X86_DIRFLAG_ANY;
15332}
15333
15334/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
15335
15336static bool
15337ix86_check_avx_upper_register (const_rtx exp)
15338{
15339 /* construct_container may return a parallel with expr_list
15340 which contains the real reg and mode */
15341 subrtx_iterator::array_type array;
15342 FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
15343 {
15344 const_rtx x = *iter;
15345 if (SSE_REG_P (x)
15346 && !EXT_REX_SSE_REG_P (x)
15347 && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
15348 return true;
15349 }
15350
15351 return false;
15352}
15353
15354/* Check if a 256bit or 512bit AVX register is referenced in stores. */
15355
15356static void
15357ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
15358{
15359 if (SSE_REG_P (dest)
15360 && !EXT_REX_SSE_REG_P (dest)
15361 && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15362 {
15363 bool *used = (bool *) data;
15364 *used = true;
15365 }
15366}
15367
15368/* Return needed mode for entity in optimize_mode_switching pass. */
15369
15370static int
15371ix86_avx_u128_mode_needed (rtx_insn *insn)
15372{
15373 if (DEBUG_INSN_P (insn))
15374 return AVX_U128_ANY;
15375
15376 if (CALL_P (insn))
15377 {
15378 rtx link;
15379
15380 /* Needed mode is set to AVX_U128_CLEAN if there are
15381 no 256bit or 512bit modes used in function arguments. */
15382 for (link = CALL_INSN_FUNCTION_USAGE (insn);
15383 link;
15384 link = XEXP (link, 1))
15385 {
15386 if (GET_CODE (XEXP (link, 0)) == USE)
15387 {
15388 rtx arg = XEXP (XEXP (link, 0), 0);
15389
15390 if (ix86_check_avx_upper_register (exp: arg))
15391 return AVX_U128_DIRTY;
15392 }
15393 }
15394
15395 /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
15396 nor 512bit registers used in the function return register. */
15397 bool avx_upper_reg_found = false;
15398 note_stores (insn, ix86_check_avx_upper_stores,
15399 &avx_upper_reg_found);
15400 if (avx_upper_reg_found)
15401 return AVX_U128_DIRTY;
15402
15403 /* If the function is known to preserve some SSE registers,
15404 RA and previous passes can legitimately rely on that for
15405 modes wider than 256 bits. It's only safe to issue a
15406 vzeroupper if all SSE registers are clobbered. */
15407 const function_abi &abi = insn_callee_abi (insn);
15408 if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
15409 /* Should be safe to issue an vzeroupper before sibling_call_p.
15410 Also there not mode_exit for sibling_call, so there could be
15411 missing vzeroupper for that. */
15412 || !(SIBLING_CALL_P (insn)
15413 || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15414 y: abi.mode_clobbers (V4DImode))))
15415 return AVX_U128_ANY;
15416
15417 return AVX_U128_CLEAN;
15418 }
15419
15420 rtx set = single_set (insn);
15421 if (set)
15422 {
15423 rtx dest = SET_DEST (set);
15424 rtx src = SET_SRC (set);
15425 if (SSE_REG_P (dest)
15426 && !EXT_REX_SSE_REG_P (dest)
15427 && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15428 {
15429 /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
15430 source isn't zero. */
15431 if (standard_sse_constant_p (x: src, GET_MODE (dest)) != 1)
15432 return AVX_U128_DIRTY;
15433 else
15434 return AVX_U128_ANY;
15435 }
15436 else
15437 {
15438 if (ix86_check_avx_upper_register (exp: src))
15439 return AVX_U128_DIRTY;
15440 }
15441
15442 /* This isn't YMM/ZMM load/store. */
15443 return AVX_U128_ANY;
15444 }
15445
15446 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
15447 Hardware changes state only when a 256bit register is written to,
15448 but we need to prevent the compiler from moving optimal insertion
15449 point above eventual read from 256bit or 512 bit register. */
15450 if (ix86_check_avx_upper_register (exp: PATTERN (insn)))
15451 return AVX_U128_DIRTY;
15452
15453 return AVX_U128_ANY;
15454}
15455
15456/* Return mode that i387 must be switched into
15457 prior to the execution of insn. */
15458
15459static int
15460ix86_i387_mode_needed (int entity, rtx_insn *insn)
15461{
15462 enum attr_i387_cw mode;
15463
15464 /* The mode UNINITIALIZED is used to store control word after a
15465 function call or ASM pattern. The mode ANY specify that function
15466 has no requirements on the control word and make no changes in the
15467 bits we are interested in. */
15468
15469 if (CALL_P (insn)
15470 || (NONJUMP_INSN_P (insn)
15471 && (asm_noperands (PATTERN (insn)) >= 0
15472 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15473 return I387_CW_UNINITIALIZED;
15474
15475 if (recog_memoized (insn) < 0)
15476 return I387_CW_ANY;
15477
15478 mode = get_attr_i387_cw (insn);
15479
15480 switch (entity)
15481 {
15482 case I387_ROUNDEVEN:
15483 if (mode == I387_CW_ROUNDEVEN)
15484 return mode;
15485 break;
15486
15487 case I387_TRUNC:
15488 if (mode == I387_CW_TRUNC)
15489 return mode;
15490 break;
15491
15492 case I387_FLOOR:
15493 if (mode == I387_CW_FLOOR)
15494 return mode;
15495 break;
15496
15497 case I387_CEIL:
15498 if (mode == I387_CW_CEIL)
15499 return mode;
15500 break;
15501
15502 default:
15503 gcc_unreachable ();
15504 }
15505
15506 return I387_CW_ANY;
15507}
15508
15509/* Return mode that entity must be switched into
15510 prior to the execution of insn. */
15511
15512static int
15513ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
15514{
15515 switch (entity)
15516 {
15517 case X86_DIRFLAG:
15518 return ix86_dirflag_mode_needed (insn);
15519 case AVX_U128:
15520 return ix86_avx_u128_mode_needed (insn);
15521 case I387_ROUNDEVEN:
15522 case I387_TRUNC:
15523 case I387_FLOOR:
15524 case I387_CEIL:
15525 return ix86_i387_mode_needed (entity, insn);
15526 default:
15527 gcc_unreachable ();
15528 }
15529 return 0;
15530}
15531
15532/* Calculate mode of upper 128bit AVX registers after the insn. */
15533
15534static int
15535ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
15536{
15537 rtx pat = PATTERN (insn);
15538
15539 if (vzeroupper_pattern (pat, VOIDmode)
15540 || vzeroall_pattern (pat, VOIDmode))
15541 return AVX_U128_CLEAN;
15542
15543 /* We know that state is clean after CALL insn if there are no
15544 256bit or 512bit registers used in the function return register. */
15545 if (CALL_P (insn))
15546 {
15547 bool avx_upper_reg_found = false;
15548 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
15549
15550 if (avx_upper_reg_found)
15551 return AVX_U128_DIRTY;
15552
15553 /* If the function desn't clobber any sse registers or only clobber
15554 128-bit part, Then vzeroupper isn't issued before the function exit.
15555 the status not CLEAN but ANY after the function. */
15556 const function_abi &abi = insn_callee_abi (insn);
15557 if (!(SIBLING_CALL_P (insn)
15558 || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15559 y: abi.mode_clobbers (V4DImode))))
15560 return AVX_U128_ANY;
15561
15562 return AVX_U128_CLEAN;
15563 }
15564
15565 /* Otherwise, return current mode. Remember that if insn
15566 references AVX 256bit or 512bit registers, the mode was already
15567 changed to DIRTY from MODE_NEEDED. */
15568 return mode;
15569}
15570
15571/* Return the mode that an insn results in. */
15572
15573static int
15574ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
15575{
15576 switch (entity)
15577 {
15578 case X86_DIRFLAG:
15579 return mode;
15580 case AVX_U128:
15581 return ix86_avx_u128_mode_after (mode, insn);
15582 case I387_ROUNDEVEN:
15583 case I387_TRUNC:
15584 case I387_FLOOR:
15585 case I387_CEIL:
15586 return mode;
15587 default:
15588 gcc_unreachable ();
15589 }
15590}
15591
15592static int
15593ix86_dirflag_mode_entry (void)
15594{
15595 /* For TARGET_CLD or in the interrupt handler we can't assume
15596 direction flag state at function entry. */
15597 if (TARGET_CLD
15598 || cfun->machine->func_type != TYPE_NORMAL)
15599 return X86_DIRFLAG_ANY;
15600
15601 return X86_DIRFLAG_RESET;
15602}
15603
15604static int
15605ix86_avx_u128_mode_entry (void)
15606{
15607 tree arg;
15608
15609 /* Entry mode is set to AVX_U128_DIRTY if there are
15610 256bit or 512bit modes used in function arguments. */
15611 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
15612 arg = TREE_CHAIN (arg))
15613 {
15614 rtx incoming = DECL_INCOMING_RTL (arg);
15615
15616 if (incoming && ix86_check_avx_upper_register (exp: incoming))
15617 return AVX_U128_DIRTY;
15618 }
15619
15620 return AVX_U128_CLEAN;
15621}
15622
15623/* Return a mode that ENTITY is assumed to be
15624 switched to at function entry. */
15625
15626static int
15627ix86_mode_entry (int entity)
15628{
15629 switch (entity)
15630 {
15631 case X86_DIRFLAG:
15632 return ix86_dirflag_mode_entry ();
15633 case AVX_U128:
15634 return ix86_avx_u128_mode_entry ();
15635 case I387_ROUNDEVEN:
15636 case I387_TRUNC:
15637 case I387_FLOOR:
15638 case I387_CEIL:
15639 return I387_CW_ANY;
15640 default:
15641 gcc_unreachable ();
15642 }
15643}
15644
15645static int
15646ix86_avx_u128_mode_exit (void)
15647{
15648 rtx reg = crtl->return_rtx;
15649
15650 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
15651 or 512 bit modes used in the function return register. */
15652 if (reg && ix86_check_avx_upper_register (exp: reg))
15653 return AVX_U128_DIRTY;
15654
15655 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
15656 modes used in function arguments, otherwise return AVX_U128_CLEAN.
15657 */
15658 return ix86_avx_u128_mode_entry ();
15659}
15660
15661/* Return a mode that ENTITY is assumed to be
15662 switched to at function exit. */
15663
15664static int
15665ix86_mode_exit (int entity)
15666{
15667 switch (entity)
15668 {
15669 case X86_DIRFLAG:
15670 return X86_DIRFLAG_ANY;
15671 case AVX_U128:
15672 return ix86_avx_u128_mode_exit ();
15673 case I387_ROUNDEVEN:
15674 case I387_TRUNC:
15675 case I387_FLOOR:
15676 case I387_CEIL:
15677 return I387_CW_ANY;
15678 default:
15679 gcc_unreachable ();
15680 }
15681}
15682
15683static int
15684ix86_mode_priority (int, int n)
15685{
15686 return n;
15687}
15688
15689/* Output code to initialize control word copies used by trunc?f?i and
15690 rounding patterns. CURRENT_MODE is set to current control word,
15691 while NEW_MODE is set to new control word. */
15692
15693static void
15694emit_i387_cw_initialization (int mode)
15695{
15696 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15697 rtx new_mode;
15698
15699 enum ix86_stack_slot slot;
15700
15701 rtx reg = gen_reg_rtx (HImode);
15702
15703 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15704 emit_move_insn (reg, copy_rtx (stored_mode));
15705
15706 switch (mode)
15707 {
15708 case I387_CW_ROUNDEVEN:
15709 /* round to nearest */
15710 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15711 slot = SLOT_CW_ROUNDEVEN;
15712 break;
15713
15714 case I387_CW_TRUNC:
15715 /* round toward zero (truncate) */
15716 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15717 slot = SLOT_CW_TRUNC;
15718 break;
15719
15720 case I387_CW_FLOOR:
15721 /* round down toward -oo */
15722 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15723 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15724 slot = SLOT_CW_FLOOR;
15725 break;
15726
15727 case I387_CW_CEIL:
15728 /* round up toward +oo */
15729 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15730 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15731 slot = SLOT_CW_CEIL;
15732 break;
15733
15734 default:
15735 gcc_unreachable ();
15736 }
15737
15738 gcc_assert (slot < MAX_386_STACK_LOCALS);
15739
15740 new_mode = assign_386_stack_local (HImode, slot);
15741 emit_move_insn (new_mode, reg);
15742}
15743
15744/* Generate one or more insns to set ENTITY to MODE. */
15745
15746static void
15747ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
15748 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
15749{
15750 switch (entity)
15751 {
15752 case X86_DIRFLAG:
15753 if (mode == X86_DIRFLAG_RESET)
15754 emit_insn (gen_cld ());
15755 break;
15756 case AVX_U128:
15757 if (mode == AVX_U128_CLEAN)
15758 ix86_expand_avx_vzeroupper ();
15759 break;
15760 case I387_ROUNDEVEN:
15761 case I387_TRUNC:
15762 case I387_FLOOR:
15763 case I387_CEIL:
15764 if (mode != I387_CW_ANY
15765 && mode != I387_CW_UNINITIALIZED)
15766 emit_i387_cw_initialization (mode);
15767 break;
15768 default:
15769 gcc_unreachable ();
15770 }
15771}
15772
15773/* Output code for INSN to convert a float to a signed int. OPERANDS
15774 are the insn operands. The output may be [HSD]Imode and the input
15775 operand may be [SDX]Fmode. */
15776
15777const char *
15778output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
15779{
15780 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15781 bool dimode_p = GET_MODE (operands[0]) == DImode;
15782 int round_mode = get_attr_i387_cw (insn);
15783
15784 static char buf[40];
15785 const char *p;
15786
15787 /* Jump through a hoop or two for DImode, since the hardware has no
15788 non-popping instruction. We used to do this a different way, but
15789 that was somewhat fragile and broke with post-reload splitters. */
15790 if ((dimode_p || fisttp) && !stack_top_dies)
15791 output_asm_insn ("fld\t%y1", operands);
15792
15793 gcc_assert (STACK_TOP_P (operands[1]));
15794 gcc_assert (MEM_P (operands[0]));
15795 gcc_assert (GET_MODE (operands[1]) != TFmode);
15796
15797 if (fisttp)
15798 return "fisttp%Z0\t%0";
15799
15800 strcpy (dest: buf, src: "fist");
15801
15802 if (round_mode != I387_CW_ANY)
15803 output_asm_insn ("fldcw\t%3", operands);
15804
15805 p = "p%Z0\t%0";
15806 strcat (dest: buf, src: p + !(stack_top_dies || dimode_p));
15807
15808 output_asm_insn (buf, operands);
15809
15810 if (round_mode != I387_CW_ANY)
15811 output_asm_insn ("fldcw\t%2", operands);
15812
15813 return "";
15814}
15815
15816/* Output code for x87 ffreep insn. The OPNO argument, which may only
15817 have the values zero or one, indicates the ffreep insn's operand
15818 from the OPERANDS array. */
15819
15820static const char *
15821output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15822{
15823 if (TARGET_USE_FFREEP)
15824#ifdef HAVE_AS_IX86_FFREEP
15825 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15826#else
15827 {
15828 static char retval[32];
15829 int regno = REGNO (operands[opno]);
15830
15831 gcc_assert (STACK_REGNO_P (regno));
15832
15833 regno -= FIRST_STACK_REG;
15834
15835 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15836 return retval;
15837 }
15838#endif
15839
15840 return opno ? "fstp\t%y1" : "fstp\t%y0";
15841}
15842
15843
15844/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15845 should be used. UNORDERED_P is true when fucom should be used. */
15846
15847const char *
15848output_fp_compare (rtx_insn *insn, rtx *operands,
15849 bool eflags_p, bool unordered_p)
15850{
15851 rtx *xops = eflags_p ? &operands[0] : &operands[1];
15852 bool stack_top_dies;
15853
15854 static char buf[40];
15855 const char *p;
15856
15857 gcc_assert (STACK_TOP_P (xops[0]));
15858
15859 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15860
15861 if (eflags_p)
15862 {
15863 p = unordered_p ? "fucomi" : "fcomi";
15864 strcpy (dest: buf, src: p);
15865
15866 p = "p\t{%y1, %0|%0, %y1}";
15867 strcat (dest: buf, src: p + !stack_top_dies);
15868
15869 return buf;
15870 }
15871
15872 if (STACK_REG_P (xops[1])
15873 && stack_top_dies
15874 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
15875 {
15876 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
15877
15878 /* If both the top of the 387 stack die, and the other operand
15879 is also a stack register that dies, then this must be a
15880 `fcompp' float compare. */
15881 p = unordered_p ? "fucompp" : "fcompp";
15882 strcpy (dest: buf, src: p);
15883 }
15884 else if (const0_operand (xops[1], VOIDmode))
15885 {
15886 gcc_assert (!unordered_p);
15887 strcpy (dest: buf, src: "ftst");
15888 }
15889 else
15890 {
15891 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
15892 {
15893 gcc_assert (!unordered_p);
15894 p = "ficom";
15895 }
15896 else
15897 p = unordered_p ? "fucom" : "fcom";
15898
15899 strcpy (dest: buf, src: p);
15900
15901 p = "p%Z2\t%y2";
15902 strcat (dest: buf, src: p + !stack_top_dies);
15903 }
15904
15905 output_asm_insn (buf, operands);
15906 return "fnstsw\t%0";
15907}
15908
15909void
15910ix86_output_addr_vec_elt (FILE *file, int value)
15911{
15912 const char *directive = ASM_LONG;
15913
15914#ifdef ASM_QUAD
15915 if (TARGET_LP64)
15916 directive = ASM_QUAD;
15917#else
15918 gcc_assert (!TARGET_64BIT);
15919#endif
15920
15921 fprintf (stream: file, format: "%s%s%d\n", directive, LPREFIX, value);
15922}
15923
15924void
15925ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15926{
15927 const char *directive = ASM_LONG;
15928
15929#ifdef ASM_QUAD
15930 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15931 directive = ASM_QUAD;
15932#else
15933 gcc_assert (!TARGET_64BIT);
15934#endif
15935 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15936 if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
15937 fprintf (stream: file, format: "%s%s%d-%s%d\n",
15938 directive, LPREFIX, value, LPREFIX, rel);
15939#if TARGET_MACHO
15940 else if (TARGET_MACHO)
15941 {
15942 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15943 machopic_output_function_base_name (file);
15944 putc ('\n', file);
15945 }
15946#endif
15947 else if (HAVE_AS_GOTOFF_IN_DATA)
15948 fprintf (stream: file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15949 else
15950 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15951 GOT_SYMBOL_NAME, LPREFIX, value);
15952}
15953
15954#define LEA_MAX_STALL (3)
15955#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
15956
15957/* Increase given DISTANCE in half-cycles according to
15958 dependencies between PREV and NEXT instructions.
15959 Add 1 half-cycle if there is no dependency and
15960 go to next cycle if there is some dependecy. */
15961
15962static unsigned int
15963increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
15964{
15965 df_ref def, use;
15966
15967 if (!prev || !next)
15968 return distance + (distance & 1) + 2;
15969
15970 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
15971 return distance + 1;
15972
15973 FOR_EACH_INSN_USE (use, next)
15974 FOR_EACH_INSN_DEF (def, prev)
15975 if (!DF_REF_IS_ARTIFICIAL (def)
15976 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
15977 return distance + (distance & 1) + 2;
15978
15979 return distance + 1;
15980}
15981
15982/* Function checks if instruction INSN defines register number
15983 REGNO1 or REGNO2. */
15984
15985bool
15986insn_defines_reg (unsigned int regno1, unsigned int regno2,
15987 rtx_insn *insn)
15988{
15989 df_ref def;
15990
15991 FOR_EACH_INSN_DEF (def, insn)
15992 if (DF_REF_REG_DEF_P (def)
15993 && !DF_REF_IS_ARTIFICIAL (def)
15994 && (regno1 == DF_REF_REGNO (def)
15995 || regno2 == DF_REF_REGNO (def)))
15996 return true;
15997
15998 return false;
15999}
16000
16001/* Function checks if instruction INSN uses register number
16002 REGNO as a part of address expression. */
16003
16004static bool
16005insn_uses_reg_mem (unsigned int regno, rtx insn)
16006{
16007 df_ref use;
16008
16009 FOR_EACH_INSN_USE (use, insn)
16010 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
16011 return true;
16012
16013 return false;
16014}
16015
16016/* Search backward for non-agu definition of register number REGNO1
16017 or register number REGNO2 in basic block starting from instruction
16018 START up to head of basic block or instruction INSN.
16019
16020 Function puts true value into *FOUND var if definition was found
16021 and false otherwise.
16022
16023 Distance in half-cycles between START and found instruction or head
16024 of BB is added to DISTANCE and returned. */
16025
16026static int
16027distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16028 rtx_insn *insn, int distance,
16029 rtx_insn *start, bool *found)
16030{
16031 basic_block bb = start ? BLOCK_FOR_INSN (insn: start) : NULL;
16032 rtx_insn *prev = start;
16033 rtx_insn *next = NULL;
16034
16035 *found = false;
16036
16037 while (prev
16038 && prev != insn
16039 && distance < LEA_SEARCH_THRESHOLD)
16040 {
16041 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16042 {
16043 distance = increase_distance (prev, next, distance);
16044 if (insn_defines_reg (regno1, regno2, insn: prev))
16045 {
16046 if (recog_memoized (insn: prev) < 0
16047 || get_attr_type (prev) != TYPE_LEA)
16048 {
16049 *found = true;
16050 return distance;
16051 }
16052 }
16053
16054 next = prev;
16055 }
16056 if (prev == BB_HEAD (bb))
16057 break;
16058
16059 prev = PREV_INSN (insn: prev);
16060 }
16061
16062 return distance;
16063}
16064
16065/* Search backward for non-agu definition of register number REGNO1
16066 or register number REGNO2 in INSN's basic block until
16067 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16068 2. Reach neighbor BBs boundary, or
16069 3. Reach agu definition.
16070 Returns the distance between the non-agu definition point and INSN.
16071 If no definition point, returns -1. */
16072
16073static int
16074distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16075 rtx_insn *insn)
16076{
16077 basic_block bb = BLOCK_FOR_INSN (insn);
16078 int distance = 0;
16079 bool found = false;
16080
16081 if (insn != BB_HEAD (bb))
16082 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16083 distance, start: PREV_INSN (insn),
16084 found: &found);
16085
16086 if (!found && distance < LEA_SEARCH_THRESHOLD)
16087 {
16088 edge e;
16089 edge_iterator ei;
16090 bool simple_loop = false;
16091
16092 FOR_EACH_EDGE (e, ei, bb->preds)
16093 if (e->src == bb)
16094 {
16095 simple_loop = true;
16096 break;
16097 }
16098
16099 if (simple_loop)
16100 distance = distance_non_agu_define_in_bb (regno1, regno2,
16101 insn, distance,
16102 BB_END (bb), found: &found);
16103 else
16104 {
16105 int shortest_dist = -1;
16106 bool found_in_bb = false;
16107
16108 FOR_EACH_EDGE (e, ei, bb->preds)
16109 {
16110 int bb_dist
16111 = distance_non_agu_define_in_bb (regno1, regno2,
16112 insn, distance,
16113 BB_END (e->src),
16114 found: &found_in_bb);
16115 if (found_in_bb)
16116 {
16117 if (shortest_dist < 0)
16118 shortest_dist = bb_dist;
16119 else if (bb_dist > 0)
16120 shortest_dist = MIN (bb_dist, shortest_dist);
16121
16122 found = true;
16123 }
16124 }
16125
16126 distance = shortest_dist;
16127 }
16128 }
16129
16130 if (!found)
16131 return -1;
16132
16133 return distance >> 1;
16134}
16135
16136/* Return the distance in half-cycles between INSN and the next
16137 insn that uses register number REGNO in memory address added
16138 to DISTANCE. Return -1 if REGNO0 is set.
16139
16140 Put true value into *FOUND if register usage was found and
16141 false otherwise.
16142 Put true value into *REDEFINED if register redefinition was
16143 found and false otherwise. */
16144
16145static int
16146distance_agu_use_in_bb (unsigned int regno,
16147 rtx_insn *insn, int distance, rtx_insn *start,
16148 bool *found, bool *redefined)
16149{
16150 basic_block bb = NULL;
16151 rtx_insn *next = start;
16152 rtx_insn *prev = NULL;
16153
16154 *found = false;
16155 *redefined = false;
16156
16157 if (start != NULL_RTX)
16158 {
16159 bb = BLOCK_FOR_INSN (insn: start);
16160 if (start != BB_HEAD (bb))
16161 /* If insn and start belong to the same bb, set prev to insn,
16162 so the call to increase_distance will increase the distance
16163 between insns by 1. */
16164 prev = insn;
16165 }
16166
16167 while (next
16168 && next != insn
16169 && distance < LEA_SEARCH_THRESHOLD)
16170 {
16171 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16172 {
16173 distance = increase_distance(prev, next, distance);
16174 if (insn_uses_reg_mem (regno, insn: next))
16175 {
16176 /* Return DISTANCE if OP0 is used in memory
16177 address in NEXT. */
16178 *found = true;
16179 return distance;
16180 }
16181
16182 if (insn_defines_reg (regno1: regno, INVALID_REGNUM, insn: next))
16183 {
16184 /* Return -1 if OP0 is set in NEXT. */
16185 *redefined = true;
16186 return -1;
16187 }
16188
16189 prev = next;
16190 }
16191
16192 if (next == BB_END (bb))
16193 break;
16194
16195 next = NEXT_INSN (insn: next);
16196 }
16197
16198 return distance;
16199}
16200
16201/* Return the distance between INSN and the next insn that uses
16202 register number REGNO0 in memory address. Return -1 if no such
16203 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16204
16205static int
16206distance_agu_use (unsigned int regno0, rtx_insn *insn)
16207{
16208 basic_block bb = BLOCK_FOR_INSN (insn);
16209 int distance = 0;
16210 bool found = false;
16211 bool redefined = false;
16212
16213 if (insn != BB_END (bb))
16214 distance = distance_agu_use_in_bb (regno: regno0, insn, distance,
16215 start: NEXT_INSN (insn),
16216 found: &found, redefined: &redefined);
16217
16218 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16219 {
16220 edge e;
16221 edge_iterator ei;
16222 bool simple_loop = false;
16223
16224 FOR_EACH_EDGE (e, ei, bb->succs)
16225 if (e->dest == bb)
16226 {
16227 simple_loop = true;
16228 break;
16229 }
16230
16231 if (simple_loop)
16232 distance = distance_agu_use_in_bb (regno: regno0, insn,
16233 distance, BB_HEAD (bb),
16234 found: &found, redefined: &redefined);
16235 else
16236 {
16237 int shortest_dist = -1;
16238 bool found_in_bb = false;
16239 bool redefined_in_bb = false;
16240
16241 FOR_EACH_EDGE (e, ei, bb->succs)
16242 {
16243 int bb_dist
16244 = distance_agu_use_in_bb (regno: regno0, insn,
16245 distance, BB_HEAD (e->dest),
16246 found: &found_in_bb, redefined: &redefined_in_bb);
16247 if (found_in_bb)
16248 {
16249 if (shortest_dist < 0)
16250 shortest_dist = bb_dist;
16251 else if (bb_dist > 0)
16252 shortest_dist = MIN (bb_dist, shortest_dist);
16253
16254 found = true;
16255 }
16256 }
16257
16258 distance = shortest_dist;
16259 }
16260 }
16261
16262 if (!found || redefined)
16263 return -1;
16264
16265 return distance >> 1;
16266}
16267
16268/* Define this macro to tune LEA priority vs ADD, it take effect when
16269 there is a dilemma of choosing LEA or ADD
16270 Negative value: ADD is more preferred than LEA
16271 Zero: Neutral
16272 Positive value: LEA is more preferred than ADD. */
16273#define IX86_LEA_PRIORITY 0
16274
16275/* Return true if usage of lea INSN has performance advantage
16276 over a sequence of instructions. Instructions sequence has
16277 SPLIT_COST cycles higher latency than lea latency. */
16278
16279static bool
16280ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
16281 unsigned int regno2, int split_cost, bool has_scale)
16282{
16283 int dist_define, dist_use;
16284
16285 /* For Atom processors newer than Bonnell, if using a 2-source or
16286 3-source LEA for non-destructive destination purposes, or due to
16287 wanting ability to use SCALE, the use of LEA is justified. */
16288 if (!TARGET_CPU_P (BONNELL))
16289 {
16290 if (has_scale)
16291 return true;
16292 if (split_cost < 1)
16293 return false;
16294 if (regno0 == regno1 || regno0 == regno2)
16295 return false;
16296 return true;
16297 }
16298
16299 /* Remember recog_data content. */
16300 struct recog_data_d recog_data_save = recog_data;
16301
16302 dist_define = distance_non_agu_define (regno1, regno2, insn);
16303 dist_use = distance_agu_use (regno0, insn);
16304
16305 /* distance_non_agu_define can call get_attr_type which can call
16306 recog_memoized, restore recog_data back to previous content. */
16307 recog_data = recog_data_save;
16308
16309 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16310 {
16311 /* If there is no non AGU operand definition, no AGU
16312 operand usage and split cost is 0 then both lea
16313 and non lea variants have same priority. Currently
16314 we prefer lea for 64 bit code and non lea on 32 bit
16315 code. */
16316 if (dist_use < 0 && split_cost == 0)
16317 return TARGET_64BIT || IX86_LEA_PRIORITY;
16318 else
16319 return true;
16320 }
16321
16322 /* With longer definitions distance lea is more preferable.
16323 Here we change it to take into account splitting cost and
16324 lea priority. */
16325 dist_define += split_cost + IX86_LEA_PRIORITY;
16326
16327 /* If there is no use in memory addess then we just check
16328 that split cost exceeds AGU stall. */
16329 if (dist_use < 0)
16330 return dist_define > LEA_MAX_STALL;
16331
16332 /* If this insn has both backward non-agu dependence and forward
16333 agu dependence, the one with short distance takes effect. */
16334 return dist_define >= dist_use;
16335}
16336
16337/* Return true if we need to split op0 = op1 + op2 into a sequence of
16338 move and add to avoid AGU stalls. */
16339
16340bool
16341ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
16342{
16343 unsigned int regno0, regno1, regno2;
16344
16345 /* Check if we need to optimize. */
16346 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16347 return false;
16348
16349 regno0 = true_regnum (operands[0]);
16350 regno1 = true_regnum (operands[1]);
16351 regno2 = true_regnum (operands[2]);
16352
16353 /* We need to split only adds with non destructive
16354 destination operand. */
16355 if (regno0 == regno1 || regno0 == regno2)
16356 return false;
16357 else
16358 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 1, has_scale: false);
16359}
16360
16361/* Return true if we should emit lea instruction instead of mov
16362 instruction. */
16363
16364bool
16365ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
16366{
16367 unsigned int regno0, regno1;
16368
16369 /* Check if we need to optimize. */
16370 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16371 return false;
16372
16373 /* Use lea for reg to reg moves only. */
16374 if (!REG_P (operands[0]) || !REG_P (operands[1]))
16375 return false;
16376
16377 regno0 = true_regnum (operands[0]);
16378 regno1 = true_regnum (operands[1]);
16379
16380 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, split_cost: 0, has_scale: false);
16381}
16382
16383/* Return true if we need to split lea into a sequence of
16384 instructions to avoid AGU stalls during peephole2. */
16385
16386bool
16387ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
16388{
16389 unsigned int regno0, regno1, regno2;
16390 int split_cost;
16391 struct ix86_address parts;
16392 int ok;
16393
16394 /* The "at least two components" test below might not catch simple
16395 move or zero extension insns if parts.base is non-NULL and parts.disp
16396 is const0_rtx as the only components in the address, e.g. if the
16397 register is %rbp or %r13. As this test is much cheaper and moves or
16398 zero extensions are the common case, do this check first. */
16399 if (REG_P (operands[1])
16400 || (SImode_address_operand (operands[1], VOIDmode)
16401 && REG_P (XEXP (operands[1], 0))))
16402 return false;
16403
16404 ok = ix86_decompose_address (addr: operands[1], out: &parts);
16405 gcc_assert (ok);
16406
16407 /* There should be at least two components in the address. */
16408 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
16409 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
16410 return false;
16411
16412 /* We should not split into add if non legitimate pic
16413 operand is used as displacement. */
16414 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16415 return false;
16416
16417 regno0 = true_regnum (operands[0]) ;
16418 regno1 = INVALID_REGNUM;
16419 regno2 = INVALID_REGNUM;
16420
16421 if (parts.base)
16422 regno1 = true_regnum (parts.base);
16423 if (parts.index)
16424 regno2 = true_regnum (parts.index);
16425
16426 /* Use add for a = a + b and a = b + a since it is faster and shorter
16427 than lea for most processors. For the processors like BONNELL, if
16428 the destination register of LEA holds an actual address which will
16429 be used soon, LEA is better and otherwise ADD is better. */
16430 if (!TARGET_CPU_P (BONNELL)
16431 && parts.scale == 1
16432 && (!parts.disp || parts.disp == const0_rtx)
16433 && (regno0 == regno1 || regno0 == regno2))
16434 return true;
16435
16436 /* Split with -Oz if the encoding requires fewer bytes. */
16437 if (optimize_size > 1
16438 && parts.scale > 1
16439 && !parts.base
16440 && (!parts.disp || parts.disp == const0_rtx))
16441 return true;
16442
16443 /* Check we need to optimize. */
16444 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
16445 return false;
16446
16447 split_cost = 0;
16448
16449 /* Compute how many cycles we will add to execution time
16450 if split lea into a sequence of instructions. */
16451 if (parts.base || parts.index)
16452 {
16453 /* Have to use mov instruction if non desctructive
16454 destination form is used. */
16455 if (regno1 != regno0 && regno2 != regno0)
16456 split_cost += 1;
16457
16458 /* Have to add index to base if both exist. */
16459 if (parts.base && parts.index)
16460 split_cost += 1;
16461
16462 /* Have to use shift and adds if scale is 2 or greater. */
16463 if (parts.scale > 1)
16464 {
16465 if (regno0 != regno1)
16466 split_cost += 1;
16467 else if (regno2 == regno0)
16468 split_cost += 4;
16469 else
16470 split_cost += parts.scale;
16471 }
16472
16473 /* Have to use add instruction with immediate if
16474 disp is non zero. */
16475 if (parts.disp && parts.disp != const0_rtx)
16476 split_cost += 1;
16477
16478 /* Subtract the price of lea. */
16479 split_cost -= 1;
16480 }
16481
16482 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
16483 has_scale: parts.scale > 1);
16484}
16485
16486/* Return true if it is ok to optimize an ADD operation to LEA
16487 operation to avoid flag register consumation. For most processors,
16488 ADD is faster than LEA. For the processors like BONNELL, if the
16489 destination register of LEA holds an actual address which will be
16490 used soon, LEA is better and otherwise ADD is better. */
16491
16492bool
16493ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
16494{
16495 unsigned int regno0 = true_regnum (operands[0]);
16496 unsigned int regno1 = true_regnum (operands[1]);
16497 unsigned int regno2 = true_regnum (operands[2]);
16498
16499 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16500 if (regno0 != regno1 && regno0 != regno2)
16501 return true;
16502
16503 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16504 return false;
16505
16506 return ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 0, has_scale: false);
16507}
16508
16509/* Return true if destination reg of SET_BODY is shift count of
16510 USE_BODY. */
16511
16512static bool
16513ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16514{
16515 rtx set_dest;
16516 rtx shift_rtx;
16517 int i;
16518
16519 /* Retrieve destination of SET_BODY. */
16520 switch (GET_CODE (set_body))
16521 {
16522 case SET:
16523 set_dest = SET_DEST (set_body);
16524 if (!set_dest || !REG_P (set_dest))
16525 return false;
16526 break;
16527 case PARALLEL:
16528 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16529 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16530 use_body))
16531 return true;
16532 /* FALLTHROUGH */
16533 default:
16534 return false;
16535 }
16536
16537 /* Retrieve shift count of USE_BODY. */
16538 switch (GET_CODE (use_body))
16539 {
16540 case SET:
16541 shift_rtx = XEXP (use_body, 1);
16542 break;
16543 case PARALLEL:
16544 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16545 if (ix86_dep_by_shift_count_body (set_body,
16546 XVECEXP (use_body, 0, i)))
16547 return true;
16548 /* FALLTHROUGH */
16549 default:
16550 return false;
16551 }
16552
16553 if (shift_rtx
16554 && (GET_CODE (shift_rtx) == ASHIFT
16555 || GET_CODE (shift_rtx) == LSHIFTRT
16556 || GET_CODE (shift_rtx) == ASHIFTRT
16557 || GET_CODE (shift_rtx) == ROTATE
16558 || GET_CODE (shift_rtx) == ROTATERT))
16559 {
16560 rtx shift_count = XEXP (shift_rtx, 1);
16561
16562 /* Return true if shift count is dest of SET_BODY. */
16563 if (REG_P (shift_count))
16564 {
16565 /* Add check since it can be invoked before register
16566 allocation in pre-reload schedule. */
16567 if (reload_completed
16568 && true_regnum (set_dest) == true_regnum (shift_count))
16569 return true;
16570 else if (REGNO(set_dest) == REGNO(shift_count))
16571 return true;
16572 }
16573 }
16574
16575 return false;
16576}
16577
16578/* Return true if destination reg of SET_INSN is shift count of
16579 USE_INSN. */
16580
16581bool
16582ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16583{
16584 return ix86_dep_by_shift_count_body (set_body: PATTERN (insn: set_insn),
16585 use_body: PATTERN (insn: use_insn));
16586}
16587
16588/* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16589 are ok, keeping in mind the possible movddup alternative. */
16590
16591bool
16592ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16593{
16594 if (MEM_P (operands[0]))
16595 return rtx_equal_p (operands[0], operands[1 + high]);
16596 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16597 return false;
16598 return true;
16599}
16600
16601/* A subroutine of ix86_build_signbit_mask. If VECT is true,
16602 then replicate the value for all elements of the vector
16603 register. */
16604
16605rtx
16606ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
16607{
16608 int i, n_elt;
16609 rtvec v;
16610 machine_mode scalar_mode;
16611
16612 switch (mode)
16613 {
16614 case E_V64QImode:
16615 case E_V32QImode:
16616 case E_V16QImode:
16617 case E_V32HImode:
16618 case E_V16HImode:
16619 case E_V8HImode:
16620 case E_V16SImode:
16621 case E_V8SImode:
16622 case E_V4SImode:
16623 case E_V2SImode:
16624 case E_V8DImode:
16625 case E_V4DImode:
16626 case E_V2DImode:
16627 gcc_assert (vect);
16628 /* FALLTHRU */
16629 case E_V2HFmode:
16630 case E_V4HFmode:
16631 case E_V8HFmode:
16632 case E_V16HFmode:
16633 case E_V32HFmode:
16634 case E_V16SFmode:
16635 case E_V8SFmode:
16636 case E_V4SFmode:
16637 case E_V2SFmode:
16638 case E_V8DFmode:
16639 case E_V4DFmode:
16640 case E_V2DFmode:
16641 case E_V32BFmode:
16642 case E_V16BFmode:
16643 case E_V8BFmode:
16644 case E_V4BFmode:
16645 case E_V2BFmode:
16646 n_elt = GET_MODE_NUNITS (mode);
16647 v = rtvec_alloc (n_elt);
16648 scalar_mode = GET_MODE_INNER (mode);
16649
16650 RTVEC_ELT (v, 0) = value;
16651
16652 for (i = 1; i < n_elt; ++i)
16653 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
16654
16655 return gen_rtx_CONST_VECTOR (mode, v);
16656
16657 default:
16658 gcc_unreachable ();
16659 }
16660}
16661
16662/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16663 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16664 for an SSE register. If VECT is true, then replicate the mask for
16665 all elements of the vector register. If INVERT is true, then create
16666 a mask excluding the sign bit. */
16667
16668rtx
16669ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
16670{
16671 machine_mode vec_mode, imode;
16672 wide_int w;
16673 rtx mask, v;
16674
16675 switch (mode)
16676 {
16677 case E_V2HFmode:
16678 case E_V4HFmode:
16679 case E_V8HFmode:
16680 case E_V16HFmode:
16681 case E_V32HFmode:
16682 case E_V32BFmode:
16683 case E_V16BFmode:
16684 case E_V8BFmode:
16685 case E_V4BFmode:
16686 case E_V2BFmode:
16687 vec_mode = mode;
16688 imode = HImode;
16689 break;
16690
16691 case E_V16SImode:
16692 case E_V16SFmode:
16693 case E_V8SImode:
16694 case E_V4SImode:
16695 case E_V8SFmode:
16696 case E_V4SFmode:
16697 case E_V2SFmode:
16698 case E_V2SImode:
16699 vec_mode = mode;
16700 imode = SImode;
16701 break;
16702
16703 case E_V8DImode:
16704 case E_V4DImode:
16705 case E_V2DImode:
16706 case E_V8DFmode:
16707 case E_V4DFmode:
16708 case E_V2DFmode:
16709 vec_mode = mode;
16710 imode = DImode;
16711 break;
16712
16713 case E_TImode:
16714 case E_TFmode:
16715 vec_mode = VOIDmode;
16716 imode = TImode;
16717 break;
16718
16719 default:
16720 gcc_unreachable ();
16721 }
16722
16723 machine_mode inner_mode = GET_MODE_INNER (mode);
16724 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
16725 GET_MODE_BITSIZE (inner_mode));
16726 if (invert)
16727 w = wi::bit_not (x: w);
16728
16729 /* Force this value into the low part of a fp vector constant. */
16730 mask = immed_wide_int_const (w, imode);
16731 mask = gen_lowpart (inner_mode, mask);
16732
16733 if (vec_mode == VOIDmode)
16734 return force_reg (inner_mode, mask);
16735
16736 v = ix86_build_const_vector (mode: vec_mode, vect, value: mask);
16737 return force_reg (vec_mode, v);
16738}
16739
16740/* Return HOST_WIDE_INT for const vector OP in MODE. */
16741
16742HOST_WIDE_INT
16743ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
16744{
16745 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16746 gcc_unreachable ();
16747
16748 int nunits = GET_MODE_NUNITS (mode);
16749 wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
16750 machine_mode innermode = GET_MODE_INNER (mode);
16751 unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
16752
16753 switch (mode)
16754 {
16755 case E_V2QImode:
16756 case E_V4QImode:
16757 case E_V2HImode:
16758 case E_V8QImode:
16759 case E_V4HImode:
16760 case E_V2SImode:
16761 for (int i = 0; i < nunits; ++i)
16762 {
16763 int v = INTVAL (XVECEXP (op, 0, i));
16764 wide_int wv = wi::shwi (val: v, precision: innermode_bits);
16765 val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits);
16766 }
16767 break;
16768 case E_V1SImode:
16769 case E_V1DImode:
16770 op = CONST_VECTOR_ELT (op, 0);
16771 return INTVAL (op);
16772 case E_V2HFmode:
16773 case E_V2BFmode:
16774 case E_V4HFmode:
16775 case E_V4BFmode:
16776 case E_V2SFmode:
16777 for (int i = 0; i < nunits; ++i)
16778 {
16779 rtx x = XVECEXP (op, 0, i);
16780 int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
16781 REAL_MODE_FORMAT (innermode));
16782 wide_int wv = wi::shwi (val: v, precision: innermode_bits);
16783 val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits);
16784 }
16785 break;
16786 default:
16787 gcc_unreachable ();
16788 }
16789
16790 return val.to_shwi ();
16791}
16792
16793int ix86_get_flags_cc (rtx_code code)
16794{
16795 switch (code)
16796 {
16797 case NE: return X86_CCNE;
16798 case EQ: return X86_CCE;
16799 case GE: return X86_CCNL;
16800 case GT: return X86_CCNLE;
16801 case LE: return X86_CCLE;
16802 case LT: return X86_CCL;
16803 case GEU: return X86_CCNB;
16804 case GTU: return X86_CCNBE;
16805 case LEU: return X86_CCBE;
16806 case LTU: return X86_CCB;
16807 default: return -1;
16808 }
16809}
16810
16811/* Return TRUE or FALSE depending on whether the first SET in INSN
16812 has source and destination with matching CC modes, and that the
16813 CC mode is at least as constrained as REQ_MODE. */
16814
16815bool
16816ix86_match_ccmode (rtx insn, machine_mode req_mode)
16817{
16818 rtx set;
16819 machine_mode set_mode;
16820
16821 set = PATTERN (insn);
16822 if (GET_CODE (set) == PARALLEL)
16823 set = XVECEXP (set, 0, 0);
16824 gcc_assert (GET_CODE (set) == SET);
16825 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16826
16827 set_mode = GET_MODE (SET_DEST (set));
16828 switch (set_mode)
16829 {
16830 case E_CCNOmode:
16831 if (req_mode != CCNOmode
16832 && (req_mode != CCmode
16833 || XEXP (SET_SRC (set), 1) != const0_rtx))
16834 return false;
16835 break;
16836 case E_CCmode:
16837 if (req_mode == CCGCmode)
16838 return false;
16839 /* FALLTHRU */
16840 case E_CCGCmode:
16841 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16842 return false;
16843 /* FALLTHRU */
16844 case E_CCGOCmode:
16845 if (req_mode == CCZmode)
16846 return false;
16847 /* FALLTHRU */
16848 case E_CCZmode:
16849 break;
16850
16851 case E_CCGZmode:
16852
16853 case E_CCAmode:
16854 case E_CCCmode:
16855 case E_CCOmode:
16856 case E_CCPmode:
16857 case E_CCSmode:
16858 if (set_mode != req_mode)
16859 return false;
16860 break;
16861
16862 default:
16863 gcc_unreachable ();
16864 }
16865
16866 return GET_MODE (SET_SRC (set)) == set_mode;
16867}
16868
16869machine_mode
16870ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16871{
16872 machine_mode mode = GET_MODE (op0);
16873
16874 if (SCALAR_FLOAT_MODE_P (mode))
16875 {
16876 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16877 return CCFPmode;
16878 }
16879
16880 switch (code)
16881 {
16882 /* Only zero flag is needed. */
16883 case EQ: /* ZF=0 */
16884 case NE: /* ZF!=0 */
16885 return CCZmode;
16886 /* Codes needing carry flag. */
16887 case GEU: /* CF=0 */
16888 case LTU: /* CF=1 */
16889 rtx geu;
16890 /* Detect overflow checks. They need just the carry flag. */
16891 if (GET_CODE (op0) == PLUS
16892 && (rtx_equal_p (op1, XEXP (op0, 0))
16893 || rtx_equal_p (op1, XEXP (op0, 1))))
16894 return CCCmode;
16895 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
16896 Match LTU of op0
16897 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
16898 and op1
16899 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
16900 where CC_CCC is either CC or CCC. */
16901 else if (code == LTU
16902 && GET_CODE (op0) == NEG
16903 && GET_CODE (geu = XEXP (op0, 0)) == GEU
16904 && REG_P (XEXP (geu, 0))
16905 && (GET_MODE (XEXP (geu, 0)) == CCCmode
16906 || GET_MODE (XEXP (geu, 0)) == CCmode)
16907 && REGNO (XEXP (geu, 0)) == FLAGS_REG
16908 && XEXP (geu, 1) == const0_rtx
16909 && GET_CODE (op1) == LTU
16910 && REG_P (XEXP (op1, 0))
16911 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
16912 && REGNO (XEXP (op1, 0)) == FLAGS_REG
16913 && XEXP (op1, 1) == const0_rtx)
16914 return CCCmode;
16915 /* Similarly for *x86_cmc pattern.
16916 Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
16917 and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
16918 It is sufficient to test that the operand modes are CCCmode. */
16919 else if (code == LTU
16920 && GET_CODE (op0) == NEG
16921 && GET_CODE (XEXP (op0, 0)) == LTU
16922 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
16923 && GET_CODE (op1) == GEU
16924 && GET_MODE (XEXP (op1, 0)) == CCCmode)
16925 return CCCmode;
16926 /* Similarly for the comparison of addcarry/subborrow pattern. */
16927 else if (code == LTU
16928 && GET_CODE (op0) == ZERO_EXTEND
16929 && GET_CODE (op1) == PLUS
16930 && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode)
16931 && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND)
16932 return CCCmode;
16933 else
16934 return CCmode;
16935 case GTU: /* CF=0 & ZF=0 */
16936 case LEU: /* CF=1 | ZF=1 */
16937 return CCmode;
16938 /* Codes possibly doable only with sign flag when
16939 comparing against zero. */
16940 case GE: /* SF=OF or SF=0 */
16941 case LT: /* SF<>OF or SF=1 */
16942 if (op1 == const0_rtx)
16943 return CCGOCmode;
16944 else
16945 /* For other cases Carry flag is not required. */
16946 return CCGCmode;
16947 /* Codes doable only with sign flag when comparing
16948 against zero, but we miss jump instruction for it
16949 so we need to use relational tests against overflow
16950 that thus needs to be zero. */
16951 case GT: /* ZF=0 & SF=OF */
16952 case LE: /* ZF=1 | SF<>OF */
16953 if (op1 == const0_rtx)
16954 return CCNOmode;
16955 else
16956 return CCGCmode;
16957 default:
16958 /* CCmode should be used in all other cases. */
16959 return CCmode;
16960 }
16961}
16962
16963/* Return TRUE or FALSE depending on whether the ptest instruction
16964 INSN has source and destination with suitable matching CC modes. */
16965
16966bool
16967ix86_match_ptest_ccmode (rtx insn)
16968{
16969 rtx set, src;
16970 machine_mode set_mode;
16971
16972 set = PATTERN (insn);
16973 gcc_assert (GET_CODE (set) == SET);
16974 src = SET_SRC (set);
16975 gcc_assert (GET_CODE (src) == UNSPEC
16976 && XINT (src, 1) == UNSPEC_PTEST);
16977
16978 set_mode = GET_MODE (src);
16979 if (set_mode != CCZmode
16980 && set_mode != CCCmode
16981 && set_mode != CCmode)
16982 return false;
16983 return GET_MODE (SET_DEST (set)) == set_mode;
16984}
16985
16986/* Return the fixed registers used for condition codes. */
16987
16988static bool
16989ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16990{
16991 *p1 = FLAGS_REG;
16992 *p2 = INVALID_REGNUM;
16993 return true;
16994}
16995
16996/* If two condition code modes are compatible, return a condition code
16997 mode which is compatible with both. Otherwise, return
16998 VOIDmode. */
16999
17000static machine_mode
17001ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
17002{
17003 if (m1 == m2)
17004 return m1;
17005
17006 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17007 return VOIDmode;
17008
17009 if ((m1 == CCGCmode && m2 == CCGOCmode)
17010 || (m1 == CCGOCmode && m2 == CCGCmode))
17011 return CCGCmode;
17012
17013 if ((m1 == CCNOmode && m2 == CCGOCmode)
17014 || (m1 == CCGOCmode && m2 == CCNOmode))
17015 return CCNOmode;
17016
17017 if (m1 == CCZmode
17018 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
17019 return m2;
17020 else if (m2 == CCZmode
17021 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
17022 return m1;
17023
17024 switch (m1)
17025 {
17026 default:
17027 gcc_unreachable ();
17028
17029 case E_CCmode:
17030 case E_CCGCmode:
17031 case E_CCGOCmode:
17032 case E_CCNOmode:
17033 case E_CCAmode:
17034 case E_CCCmode:
17035 case E_CCOmode:
17036 case E_CCPmode:
17037 case E_CCSmode:
17038 case E_CCZmode:
17039 switch (m2)
17040 {
17041 default:
17042 return VOIDmode;
17043
17044 case E_CCmode:
17045 case E_CCGCmode:
17046 case E_CCGOCmode:
17047 case E_CCNOmode:
17048 case E_CCAmode:
17049 case E_CCCmode:
17050 case E_CCOmode:
17051 case E_CCPmode:
17052 case E_CCSmode:
17053 case E_CCZmode:
17054 return CCmode;
17055 }
17056
17057 case E_CCFPmode:
17058 /* These are only compatible with themselves, which we already
17059 checked above. */
17060 return VOIDmode;
17061 }
17062}
17063
17064/* Return strategy to use for floating-point. We assume that fcomi is always
17065 preferrable where available, since that is also true when looking at size
17066 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17067
17068enum ix86_fpcmp_strategy
17069ix86_fp_comparison_strategy (enum rtx_code)
17070{
17071 /* Do fcomi/sahf based test when profitable. */
17072
17073 if (TARGET_CMOVE)
17074 return IX86_FPCMP_COMI;
17075
17076 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
17077 return IX86_FPCMP_SAHF;
17078
17079 return IX86_FPCMP_ARITH;
17080}
17081
17082/* Convert comparison codes we use to represent FP comparison to integer
17083 code that will result in proper branch. Return UNKNOWN if no such code
17084 is available. */
17085
17086enum rtx_code
17087ix86_fp_compare_code_to_integer (enum rtx_code code)
17088{
17089 switch (code)
17090 {
17091 case GT:
17092 return GTU;
17093 case GE:
17094 return GEU;
17095 case ORDERED:
17096 case UNORDERED:
17097 return code;
17098 case UNEQ:
17099 return EQ;
17100 case UNLT:
17101 return LTU;
17102 case UNLE:
17103 return LEU;
17104 case LTGT:
17105 return NE;
17106 case EQ:
17107 case NE:
17108 if (TARGET_AVX10_2)
17109 return code;
17110 /* FALLTHRU. */
17111 default:
17112 return UNKNOWN;
17113 }
17114}
17115
17116/* Zero extend possibly SImode EXP to Pmode register. */
17117rtx
17118ix86_zero_extend_to_Pmode (rtx exp)
17119{
17120 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
17121}
17122
17123/* Return true if the function is called via PLT. */
17124
17125bool
17126ix86_call_use_plt_p (rtx call_op)
17127{
17128 if (SYMBOL_REF_LOCAL_P (call_op))
17129 {
17130 if (SYMBOL_REF_DECL (call_op)
17131 && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
17132 {
17133 /* NB: All ifunc functions must be called via PLT. */
17134 cgraph_node *node
17135 = cgraph_node::get (SYMBOL_REF_DECL (call_op));
17136 if (node && node->ifunc_resolver)
17137 return true;
17138 }
17139 return false;
17140 }
17141 return true;
17142}
17143
17144/* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
17145 the PLT entry will be used as the function address for local IFUNC
17146 functions. When the PIC register is needed for PLT call, indirect
17147 call via the PLT entry will fail since the PIC register may not be
17148 set up properly for indirect call. In this case, we should return
17149 false. */
17150
17151static bool
17152ix86_ifunc_ref_local_ok (void)
17153{
17154 return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
17155}
17156
17157/* Return true if the function being called was marked with attribute
17158 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
17159 to handle the non-PIC case in the backend because there is no easy
17160 interface for the front-end to force non-PLT calls to use the GOT.
17161 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
17162 to call the function marked "noplt" indirectly. */
17163
17164bool
17165ix86_nopic_noplt_attribute_p (rtx call_op)
17166{
17167 if (flag_pic || ix86_cmodel == CM_LARGE
17168 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
17169 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
17170 || SYMBOL_REF_LOCAL_P (call_op))
17171 return false;
17172
17173 tree symbol_decl = SYMBOL_REF_DECL (call_op);
17174
17175 if (!flag_plt
17176 || (symbol_decl != NULL_TREE
17177 && lookup_attribute (attr_name: "noplt", DECL_ATTRIBUTES (symbol_decl))))
17178 return true;
17179
17180 return false;
17181}
17182
17183/* Helper to output the jmp/call. */
17184static void
17185ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
17186{
17187 if (thunk_name != NULL)
17188 {
17189 if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
17190 && ix86_indirect_branch_cs_prefix)
17191 fprintf (stream: asm_out_file, format: "\tcs\n");
17192 fprintf (stream: asm_out_file, format: "\tjmp\t");
17193 assemble_name (asm_out_file, thunk_name);
17194 putc (c: '\n', stream: asm_out_file);
17195 if ((ix86_harden_sls & harden_sls_indirect_jmp))
17196 fputs (s: "\tint3\n", stream: asm_out_file);
17197 }
17198 else
17199 output_indirect_thunk (regno);
17200}
17201
17202/* Output indirect branch via a call and return thunk. CALL_OP is a
17203 register which contains the branch target. XASM is the assembly
17204 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
17205 A normal call is converted to:
17206
17207 call __x86_indirect_thunk_reg
17208
17209 and a tail call is converted to:
17210
17211 jmp __x86_indirect_thunk_reg
17212 */
17213
17214static void
17215ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
17216{
17217 char thunk_name_buf[32];
17218 char *thunk_name;
17219 enum indirect_thunk_prefix need_prefix
17220 = indirect_thunk_need_prefix (insn: current_output_insn);
17221 int regno = REGNO (call_op);
17222
17223 if (cfun->machine->indirect_branch_type
17224 != indirect_branch_thunk_inline)
17225 {
17226 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
17227 SET_HARD_REG_BIT (set&: indirect_thunks_used, bit: regno);
17228
17229 indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false);
17230 thunk_name = thunk_name_buf;
17231 }
17232 else
17233 thunk_name = NULL;
17234
17235 if (sibcall_p)
17236 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17237 else
17238 {
17239 if (thunk_name != NULL)
17240 {
17241 if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
17242 && ix86_indirect_branch_cs_prefix)
17243 fprintf (stream: asm_out_file, format: "\tcs\n");
17244 fprintf (stream: asm_out_file, format: "\tcall\t");
17245 assemble_name (asm_out_file, thunk_name);
17246 putc (c: '\n', stream: asm_out_file);
17247 return;
17248 }
17249
17250 char indirectlabel1[32];
17251 char indirectlabel2[32];
17252
17253 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
17254 INDIRECT_LABEL,
17255 indirectlabelno++);
17256 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
17257 INDIRECT_LABEL,
17258 indirectlabelno++);
17259
17260 /* Jump. */
17261 fputs (s: "\tjmp\t", stream: asm_out_file);
17262 assemble_name_raw (asm_out_file, indirectlabel2);
17263 fputc (c: '\n', stream: asm_out_file);
17264
17265 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
17266
17267 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17268
17269 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17270
17271 /* Call. */
17272 fputs (s: "\tcall\t", stream: asm_out_file);
17273 assemble_name_raw (asm_out_file, indirectlabel1);
17274 fputc (c: '\n', stream: asm_out_file);
17275 }
17276}
17277
17278/* Output indirect branch via a call and return thunk. CALL_OP is
17279 the branch target. XASM is the assembly template for CALL_OP.
17280 Branch is a tail call if SIBCALL_P is true. A normal call is
17281 converted to:
17282
17283 jmp L2
17284 L1:
17285 push CALL_OP
17286 jmp __x86_indirect_thunk
17287 L2:
17288 call L1
17289
17290 and a tail call is converted to:
17291
17292 push CALL_OP
17293 jmp __x86_indirect_thunk
17294 */
17295
17296static void
17297ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
17298 bool sibcall_p)
17299{
17300 char thunk_name_buf[32];
17301 char *thunk_name;
17302 char push_buf[64];
17303 enum indirect_thunk_prefix need_prefix
17304 = indirect_thunk_need_prefix (insn: current_output_insn);
17305 int regno = -1;
17306
17307 if (cfun->machine->indirect_branch_type
17308 != indirect_branch_thunk_inline)
17309 {
17310 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
17311 indirect_thunk_needed = true;
17312 indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false);
17313 thunk_name = thunk_name_buf;
17314 }
17315 else
17316 thunk_name = NULL;
17317
17318 snprintf (s: push_buf, maxlen: sizeof (push_buf), format: "push{%c}\t%s",
17319 TARGET_64BIT ? 'q' : 'l', xasm);
17320
17321 if (sibcall_p)
17322 {
17323 output_asm_insn (push_buf, &call_op);
17324 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17325 }
17326 else
17327 {
17328 char indirectlabel1[32];
17329 char indirectlabel2[32];
17330
17331 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
17332 INDIRECT_LABEL,
17333 indirectlabelno++);
17334 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
17335 INDIRECT_LABEL,
17336 indirectlabelno++);
17337
17338 /* Jump. */
17339 fputs (s: "\tjmp\t", stream: asm_out_file);
17340 assemble_name_raw (asm_out_file, indirectlabel2);
17341 fputc (c: '\n', stream: asm_out_file);
17342
17343 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
17344
17345 /* An external function may be called via GOT, instead of PLT. */
17346 if (MEM_P (call_op))
17347 {
17348 struct ix86_address parts;
17349 rtx addr = XEXP (call_op, 0);
17350 if (ix86_decompose_address (addr, out: &parts)
17351 && parts.base == stack_pointer_rtx)
17352 {
17353 /* Since call will adjust stack by -UNITS_PER_WORD,
17354 we must convert "disp(stack, index, scale)" to
17355 "disp+UNITS_PER_WORD(stack, index, scale)". */
17356 if (parts.index)
17357 {
17358 addr = gen_rtx_MULT (Pmode, parts.index,
17359 GEN_INT (parts.scale));
17360 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17361 addr);
17362 }
17363 else
17364 addr = stack_pointer_rtx;
17365
17366 rtx disp;
17367 if (parts.disp != NULL_RTX)
17368 disp = plus_constant (Pmode, parts.disp,
17369 UNITS_PER_WORD);
17370 else
17371 disp = GEN_INT (UNITS_PER_WORD);
17372
17373 addr = gen_rtx_PLUS (Pmode, addr, disp);
17374 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
17375 }
17376 }
17377
17378 output_asm_insn (push_buf, &call_op);
17379
17380 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17381
17382 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17383
17384 /* Call. */
17385 fputs (s: "\tcall\t", stream: asm_out_file);
17386 assemble_name_raw (asm_out_file, indirectlabel1);
17387 fputc (c: '\n', stream: asm_out_file);
17388 }
17389}
17390
17391/* Output indirect branch via a call and return thunk. CALL_OP is
17392 the branch target. XASM is the assembly template for CALL_OP.
17393 Branch is a tail call if SIBCALL_P is true. */
17394
17395static void
17396ix86_output_indirect_branch (rtx call_op, const char *xasm,
17397 bool sibcall_p)
17398{
17399 if (REG_P (call_op))
17400 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
17401 else
17402 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
17403}
17404
17405/* Output indirect jump. CALL_OP is the jump target. */
17406
17407const char *
17408ix86_output_indirect_jmp (rtx call_op)
17409{
17410 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
17411 {
17412 /* We can't have red-zone since "call" in the indirect thunk
17413 pushes the return address onto stack, destroying red-zone. */
17414 if (ix86_red_zone_used)
17415 gcc_unreachable ();
17416
17417 ix86_output_indirect_branch (call_op, xasm: "%0", sibcall_p: true);
17418 }
17419 else
17420 output_asm_insn ("%!jmp\t%A0", &call_op);
17421 return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
17422}
17423
17424/* Output return instrumentation for current function if needed. */
17425
17426static void
17427output_return_instrumentation (void)
17428{
17429 if (ix86_instrument_return != instrument_return_none
17430 && flag_fentry
17431 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
17432 {
17433 if (ix86_flag_record_return)
17434 fprintf (stream: asm_out_file, format: "1:\n");
17435 switch (ix86_instrument_return)
17436 {
17437 case instrument_return_call:
17438 fprintf (stream: asm_out_file, format: "\tcall\t__return__\n");
17439 break;
17440 case instrument_return_nop5:
17441 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
17442 fprintf (stream: asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
17443 break;
17444 case instrument_return_none:
17445 break;
17446 }
17447
17448 if (ix86_flag_record_return)
17449 {
17450 fprintf (stream: asm_out_file, format: "\t.section __return_loc, \"a\",@progbits\n");
17451 fprintf (stream: asm_out_file, format: "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
17452 fprintf (stream: asm_out_file, format: "\t.previous\n");
17453 }
17454 }
17455}
17456
17457/* Output function return. CALL_OP is the jump target. Add a REP
17458 prefix to RET if LONG_P is true and function return is kept. */
17459
17460const char *
17461ix86_output_function_return (bool long_p)
17462{
17463 output_return_instrumentation ();
17464
17465 if (cfun->machine->function_return_type != indirect_branch_keep)
17466 {
17467 char thunk_name[32];
17468 enum indirect_thunk_prefix need_prefix
17469 = indirect_thunk_need_prefix (insn: current_output_insn);
17470
17471 if (cfun->machine->function_return_type
17472 != indirect_branch_thunk_inline)
17473 {
17474 bool need_thunk = (cfun->machine->function_return_type
17475 == indirect_branch_thunk);
17476 indirect_thunk_name (name: thunk_name, INVALID_REGNUM, need_prefix,
17477 ret_p: true);
17478 indirect_return_needed |= need_thunk;
17479 fprintf (stream: asm_out_file, format: "\tjmp\t");
17480 assemble_name (asm_out_file, thunk_name);
17481 putc (c: '\n', stream: asm_out_file);
17482 }
17483 else
17484 output_indirect_thunk (INVALID_REGNUM);
17485
17486 return "";
17487 }
17488
17489 output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
17490 return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
17491}
17492
17493/* Output indirect function return. RET_OP is the function return
17494 target. */
17495
17496const char *
17497ix86_output_indirect_function_return (rtx ret_op)
17498{
17499 if (cfun->machine->function_return_type != indirect_branch_keep)
17500 {
17501 char thunk_name[32];
17502 enum indirect_thunk_prefix need_prefix
17503 = indirect_thunk_need_prefix (insn: current_output_insn);
17504 unsigned int regno = REGNO (ret_op);
17505 gcc_assert (regno == CX_REG);
17506
17507 if (cfun->machine->function_return_type
17508 != indirect_branch_thunk_inline)
17509 {
17510 bool need_thunk = (cfun->machine->function_return_type
17511 == indirect_branch_thunk);
17512 indirect_thunk_name (name: thunk_name, regno, need_prefix, ret_p: true);
17513
17514 if (need_thunk)
17515 {
17516 indirect_return_via_cx = true;
17517 SET_HARD_REG_BIT (set&: indirect_thunks_used, CX_REG);
17518 }
17519 fprintf (stream: asm_out_file, format: "\tjmp\t");
17520 assemble_name (asm_out_file, thunk_name);
17521 putc (c: '\n', stream: asm_out_file);
17522 }
17523 else
17524 output_indirect_thunk (regno);
17525 }
17526 else
17527 {
17528 output_asm_insn ("%!jmp\t%A0", &ret_op);
17529 if (ix86_harden_sls & harden_sls_indirect_jmp)
17530 fputs (s: "\tint3\n", stream: asm_out_file);
17531 }
17532 return "";
17533}
17534
17535/* Output the assembly for a call instruction. */
17536
17537const char *
17538ix86_output_call_insn (rtx_insn *insn, rtx call_op)
17539{
17540 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
17541 bool output_indirect_p
17542 = (!TARGET_SEH
17543 && cfun->machine->indirect_branch_type != indirect_branch_keep);
17544 bool seh_nop_p = false;
17545 const char *xasm;
17546
17547 if (SIBLING_CALL_P (insn))
17548 {
17549 output_return_instrumentation ();
17550 if (direct_p)
17551 {
17552 if (ix86_nopic_noplt_attribute_p (call_op))
17553 {
17554 direct_p = false;
17555 if (TARGET_64BIT)
17556 {
17557 if (output_indirect_p)
17558 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17559 else
17560 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17561 }
17562 else
17563 {
17564 if (output_indirect_p)
17565 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17566 else
17567 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17568 }
17569 }
17570 else
17571 xasm = "%!jmp\t%P0";
17572 }
17573 /* SEH epilogue detection requires the indirect branch case
17574 to include REX.W. */
17575 else if (TARGET_SEH)
17576 xasm = "%!rex.W jmp\t%A0";
17577 else
17578 {
17579 if (output_indirect_p)
17580 xasm = "%0";
17581 else
17582 xasm = "%!jmp\t%A0";
17583 }
17584
17585 if (output_indirect_p && !direct_p)
17586 ix86_output_indirect_branch (call_op, xasm, sibcall_p: true);
17587 else
17588 {
17589 output_asm_insn (xasm, &call_op);
17590 if (!direct_p
17591 && (ix86_harden_sls & harden_sls_indirect_jmp))
17592 return "int3";
17593 }
17594 return "";
17595 }
17596
17597 /* SEH unwinding can require an extra nop to be emitted in several
17598 circumstances. Determine if we have one of those. */
17599 if (TARGET_SEH)
17600 {
17601 rtx_insn *i;
17602
17603 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (insn: i))
17604 {
17605 /* Prevent a catch region from being adjacent to a jump that would
17606 be interpreted as an epilogue sequence by the unwinder. */
17607 if (JUMP_P(i) && CROSSING_JUMP_P (i))
17608 {
17609 seh_nop_p = true;
17610 break;
17611 }
17612
17613 /* If we get to another real insn, we don't need the nop. */
17614 if (INSN_P (i))
17615 break;
17616
17617 /* If we get to the epilogue note, prevent a catch region from
17618 being adjacent to the standard epilogue sequence. Note that,
17619 if non-call exceptions are enabled, we already did it during
17620 epilogue expansion, or else, if the insn can throw internally,
17621 we already did it during the reorg pass. */
17622 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
17623 && !flag_non_call_exceptions
17624 && !can_throw_internal (insn))
17625 {
17626 seh_nop_p = true;
17627 break;
17628 }
17629 }
17630
17631 /* If we didn't find a real insn following the call, prevent the
17632 unwinder from looking into the next function. */
17633 if (i == NULL)
17634 seh_nop_p = true;
17635 }
17636
17637 if (direct_p)
17638 {
17639 if (ix86_nopic_noplt_attribute_p (call_op))
17640 {
17641 direct_p = false;
17642 if (TARGET_64BIT)
17643 {
17644 if (output_indirect_p)
17645 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17646 else
17647 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17648 }
17649 else
17650 {
17651 if (output_indirect_p)
17652 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17653 else
17654 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17655 }
17656 }
17657 else
17658 xasm = "%!call\t%P0";
17659 }
17660 else
17661 {
17662 if (output_indirect_p)
17663 xasm = "%0";
17664 else
17665 xasm = "%!call\t%A0";
17666 }
17667
17668 if (output_indirect_p && !direct_p)
17669 ix86_output_indirect_branch (call_op, xasm, sibcall_p: false);
17670 else
17671 output_asm_insn (xasm, &call_op);
17672
17673 if (seh_nop_p)
17674 return "nop";
17675
17676 return "";
17677}
17678
17679/* Return a MEM corresponding to a stack slot with mode MODE.
17680 Allocate a new slot if necessary.
17681
17682 The RTL for a function can have several slots available: N is
17683 which slot to use. */
17684
17685rtx
17686assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
17687{
17688 struct stack_local_entry *s;
17689
17690 gcc_assert (n < MAX_386_STACK_LOCALS);
17691
17692 for (s = ix86_stack_locals; s; s = s->next)
17693 if (s->mode == mode && s->n == n)
17694 return validize_mem (copy_rtx (s->rtl));
17695
17696 int align = 0;
17697 /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
17698 alignment with -m32 -mpreferred-stack-boundary=2. */
17699 if (mode == DImode
17700 && !TARGET_64BIT
17701 && n == SLOT_FLOATxFDI_387
17702 && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
17703 align = 32;
17704 s = ggc_alloc<stack_local_entry> ();
17705 s->n = n;
17706 s->mode = mode;
17707 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
17708
17709 s->next = ix86_stack_locals;
17710 ix86_stack_locals = s;
17711 return validize_mem (copy_rtx (s->rtl));
17712}
17713
17714static void
17715ix86_instantiate_decls (void)
17716{
17717 struct stack_local_entry *s;
17718
17719 for (s = ix86_stack_locals; s; s = s->next)
17720 if (s->rtl != NULL_RTX)
17721 instantiate_decl_rtl (x: s->rtl);
17722}
17723
17724/* Check whether x86 address PARTS is a pc-relative address. */
17725
17726bool
17727ix86_rip_relative_addr_p (struct ix86_address *parts)
17728{
17729 rtx base, index, disp;
17730
17731 base = parts->base;
17732 index = parts->index;
17733 disp = parts->disp;
17734
17735 if (disp && !base && !index)
17736 {
17737 if (TARGET_64BIT)
17738 {
17739 rtx symbol = disp;
17740
17741 if (GET_CODE (disp) == CONST)
17742 symbol = XEXP (disp, 0);
17743 if (GET_CODE (symbol) == PLUS
17744 && CONST_INT_P (XEXP (symbol, 1)))
17745 symbol = XEXP (symbol, 0);
17746
17747 if (LABEL_REF_P (symbol)
17748 || (SYMBOL_REF_P (symbol)
17749 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
17750 || (GET_CODE (symbol) == UNSPEC
17751 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
17752 || XINT (symbol, 1) == UNSPEC_PCREL
17753 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
17754 return true;
17755 }
17756 }
17757 return false;
17758}
17759
17760/* Calculate the length of the memory address in the instruction encoding.
17761 Includes addr32 prefix, does not include the one-byte modrm, opcode,
17762 or other prefixes. We never generate addr32 prefix for LEA insn. */
17763
17764int
17765memory_address_length (rtx addr, bool lea)
17766{
17767 struct ix86_address parts;
17768 rtx base, index, disp;
17769 int len;
17770 int ok;
17771
17772 if (GET_CODE (addr) == PRE_DEC
17773 || GET_CODE (addr) == POST_INC
17774 || GET_CODE (addr) == PRE_MODIFY
17775 || GET_CODE (addr) == POST_MODIFY)
17776 return 0;
17777
17778 ok = ix86_decompose_address (addr, out: &parts);
17779 gcc_assert (ok);
17780
17781 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
17782
17783 /* If this is not LEA instruction, add the length of addr32 prefix. */
17784 if (TARGET_64BIT && !lea
17785 && (SImode_address_operand (addr, VOIDmode)
17786 || (parts.base && GET_MODE (parts.base) == SImode)
17787 || (parts.index && GET_MODE (parts.index) == SImode)))
17788 len++;
17789
17790 base = parts.base;
17791 index = parts.index;
17792 disp = parts.disp;
17793
17794 if (base && SUBREG_P (base))
17795 base = SUBREG_REG (base);
17796 if (index && SUBREG_P (index))
17797 index = SUBREG_REG (index);
17798
17799 gcc_assert (base == NULL_RTX || REG_P (base));
17800 gcc_assert (index == NULL_RTX || REG_P (index));
17801
17802 /* Rule of thumb:
17803 - esp as the base always wants an index,
17804 - ebp as the base always wants a displacement,
17805 - r12 as the base always wants an index,
17806 - r13 as the base always wants a displacement. */
17807
17808 /* Register Indirect. */
17809 if (base && !index && !disp)
17810 {
17811 /* esp (for its index) and ebp (for its displacement) need
17812 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
17813 code. */
17814 if (base == arg_pointer_rtx
17815 || base == frame_pointer_rtx
17816 || REGNO (base) == SP_REG
17817 || REGNO (base) == BP_REG
17818 || REGNO (base) == R12_REG
17819 || REGNO (base) == R13_REG)
17820 len++;
17821 }
17822
17823 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
17824 is not disp32, but disp32(%rip), so for disp32
17825 SIB byte is needed, unless print_operand_address
17826 optimizes it into disp32(%rip) or (%rip) is implied
17827 by UNSPEC. */
17828 else if (disp && !base && !index)
17829 {
17830 len += 4;
17831 if (!ix86_rip_relative_addr_p (parts: &parts))
17832 len++;
17833 }
17834 else
17835 {
17836 /* Find the length of the displacement constant. */
17837 if (disp)
17838 {
17839 if (base && satisfies_constraint_K (op: disp))
17840 len += 1;
17841 else
17842 len += 4;
17843 }
17844 /* ebp always wants a displacement. Similarly r13. */
17845 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
17846 len++;
17847
17848 /* An index requires the two-byte modrm form.... */
17849 if (index
17850 /* ...like esp (or r12), which always wants an index. */
17851 || base == arg_pointer_rtx
17852 || base == frame_pointer_rtx
17853 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
17854 len++;
17855 }
17856
17857 return len;
17858}
17859
17860/* Compute default value for "length_immediate" attribute. When SHORTFORM
17861 is set, expect that insn have 8bit immediate alternative. */
17862int
17863ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
17864{
17865 int len = 0;
17866 int i;
17867 extract_insn_cached (insn);
17868 for (i = recog_data.n_operands - 1; i >= 0; --i)
17869 if (CONSTANT_P (recog_data.operand[i]))
17870 {
17871 enum attr_mode mode = get_attr_mode (insn);
17872
17873 gcc_assert (!len);
17874 if (shortform && CONST_INT_P (recog_data.operand[i]))
17875 {
17876 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
17877 switch (mode)
17878 {
17879 case MODE_QI:
17880 len = 1;
17881 continue;
17882 case MODE_HI:
17883 ival = trunc_int_for_mode (ival, HImode);
17884 break;
17885 case MODE_SI:
17886 ival = trunc_int_for_mode (ival, SImode);
17887 break;
17888 default:
17889 break;
17890 }
17891 if (IN_RANGE (ival, -128, 127))
17892 {
17893 len = 1;
17894 continue;
17895 }
17896 }
17897 switch (mode)
17898 {
17899 case MODE_QI:
17900 len = 1;
17901 break;
17902 case MODE_HI:
17903 len = 2;
17904 break;
17905 case MODE_SI:
17906 len = 4;
17907 break;
17908 /* Immediates for DImode instructions are encoded
17909 as 32bit sign extended values. */
17910 case MODE_DI:
17911 len = 4;
17912 break;
17913 default:
17914 fatal_insn ("unknown insn mode", insn);
17915 }
17916 }
17917 return len;
17918}
17919
17920/* Compute default value for "length_address" attribute. */
17921int
17922ix86_attr_length_address_default (rtx_insn *insn)
17923{
17924 int i;
17925
17926 if (get_attr_type (insn) == TYPE_LEA)
17927 {
17928 rtx set = PATTERN (insn), addr;
17929
17930 if (GET_CODE (set) == PARALLEL)
17931 set = XVECEXP (set, 0, 0);
17932
17933 gcc_assert (GET_CODE (set) == SET);
17934
17935 addr = SET_SRC (set);
17936
17937 return memory_address_length (addr, lea: true);
17938 }
17939
17940 extract_insn_cached (insn);
17941 for (i = recog_data.n_operands - 1; i >= 0; --i)
17942 {
17943 rtx op = recog_data.operand[i];
17944 if (MEM_P (op))
17945 {
17946 constrain_operands_cached (insn, reload_completed);
17947 if (which_alternative != -1)
17948 {
17949 const char *constraints = recog_data.constraints[i];
17950 int alt = which_alternative;
17951
17952 while (*constraints == '=' || *constraints == '+')
17953 constraints++;
17954 while (alt-- > 0)
17955 while (*constraints++ != ',')
17956 ;
17957 /* Skip ignored operands. */
17958 if (*constraints == 'X')
17959 continue;
17960 }
17961
17962 int len = memory_address_length (XEXP (op, 0), lea: false);
17963
17964 /* Account for segment prefix for non-default addr spaces. */
17965 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
17966 len++;
17967
17968 return len;
17969 }
17970 }
17971 return 0;
17972}
17973
17974/* Compute default value for "length_vex" attribute. It includes
17975 2 or 3 byte VEX prefix and 1 opcode byte. */
17976
17977int
17978ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
17979 bool has_vex_w)
17980{
17981 int i, reg_only = 2 + 1;
17982 bool has_mem = false;
17983
17984 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
17985 byte VEX prefix. */
17986 if (!has_0f_opcode || has_vex_w)
17987 return 3 + 1;
17988
17989 /* We can always use 2 byte VEX prefix in 32bit. */
17990 if (!TARGET_64BIT)
17991 return 2 + 1;
17992
17993 extract_insn_cached (insn);
17994
17995 for (i = recog_data.n_operands - 1; i >= 0; --i)
17996 if (REG_P (recog_data.operand[i]))
17997 {
17998 /* REX.W bit uses 3 byte VEX prefix.
17999 REX2 with vex use extended EVEX prefix length is 4-byte. */
18000 if (GET_MODE (recog_data.operand[i]) == DImode
18001 && GENERAL_REG_P (recog_data.operand[i]))
18002 return 3 + 1;
18003
18004 /* REX.B bit requires 3-byte VEX. Right here we don't know which
18005 operand will be encoded using VEX.B, so be conservative.
18006 REX2 with vex use extended EVEX prefix length is 4-byte. */
18007 if (REX_INT_REGNO_P (recog_data.operand[i])
18008 || REX2_INT_REGNO_P (recog_data.operand[i])
18009 || REX_SSE_REGNO_P (recog_data.operand[i]))
18010 reg_only = 3 + 1;
18011 }
18012 else if (MEM_P (recog_data.operand[i]))
18013 {
18014 /* REX2.X or REX2.B bits use 3 byte VEX prefix. */
18015 if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
18016 return 4;
18017
18018 /* REX.X or REX.B bits use 3 byte VEX prefix. */
18019 if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
18020 return 3 + 1;
18021
18022 has_mem = true;
18023 }
18024
18025 return has_mem ? 2 + 1 : reg_only;
18026}
18027
18028
18029static bool
18030ix86_class_likely_spilled_p (reg_class_t);
18031
18032/* Returns true if lhs of insn is HW function argument register and set up
18033 is_spilled to true if it is likely spilled HW register. */
18034static bool
18035insn_is_function_arg (rtx insn, bool* is_spilled)
18036{
18037 rtx dst;
18038
18039 if (!NONDEBUG_INSN_P (insn))
18040 return false;
18041 /* Call instructions are not movable, ignore it. */
18042 if (CALL_P (insn))
18043 return false;
18044 insn = PATTERN (insn);
18045 if (GET_CODE (insn) == PARALLEL)
18046 insn = XVECEXP (insn, 0, 0);
18047 if (GET_CODE (insn) != SET)
18048 return false;
18049 dst = SET_DEST (insn);
18050 if (REG_P (dst) && HARD_REGISTER_P (dst)
18051 && ix86_function_arg_regno_p (REGNO (dst)))
18052 {
18053 /* Is it likely spilled HW register? */
18054 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
18055 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
18056 *is_spilled = true;
18057 return true;
18058 }
18059 return false;
18060}
18061
18062/* Add output dependencies for chain of function adjacent arguments if only
18063 there is a move to likely spilled HW register. Return first argument
18064 if at least one dependence was added or NULL otherwise. */
18065static rtx_insn *
18066add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
18067{
18068 rtx_insn *insn;
18069 rtx_insn *last = call;
18070 rtx_insn *first_arg = NULL;
18071 bool is_spilled = false;
18072
18073 head = PREV_INSN (insn: head);
18074
18075 /* Find nearest to call argument passing instruction. */
18076 while (true)
18077 {
18078 last = PREV_INSN (insn: last);
18079 if (last == head)
18080 return NULL;
18081 if (!NONDEBUG_INSN_P (last))
18082 continue;
18083 if (insn_is_function_arg (insn: last, is_spilled: &is_spilled))
18084 break;
18085 return NULL;
18086 }
18087
18088 first_arg = last;
18089 while (true)
18090 {
18091 insn = PREV_INSN (insn: last);
18092 if (!INSN_P (insn))
18093 break;
18094 if (insn == head)
18095 break;
18096 if (!NONDEBUG_INSN_P (insn))
18097 {
18098 last = insn;
18099 continue;
18100 }
18101 if (insn_is_function_arg (insn, is_spilled: &is_spilled))
18102 {
18103 /* Add output depdendence between two function arguments if chain
18104 of output arguments contains likely spilled HW registers. */
18105 if (is_spilled)
18106 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
18107 first_arg = last = insn;
18108 }
18109 else
18110 break;
18111 }
18112 if (!is_spilled)
18113 return NULL;
18114 return first_arg;
18115}
18116
18117/* Add output or anti dependency from insn to first_arg to restrict its code
18118 motion. */
18119static void
18120avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
18121{
18122 rtx set;
18123 rtx tmp;
18124
18125 set = single_set (insn);
18126 if (!set)
18127 return;
18128 tmp = SET_DEST (set);
18129 if (REG_P (tmp))
18130 {
18131 /* Add output dependency to the first function argument. */
18132 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
18133 return;
18134 }
18135 /* Add anti dependency. */
18136 add_dependence (first_arg, insn, REG_DEP_ANTI);
18137}
18138
18139/* Avoid cross block motion of function argument through adding dependency
18140 from the first non-jump instruction in bb. */
18141static void
18142add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
18143{
18144 rtx_insn *insn = BB_END (bb);
18145
18146 while (insn)
18147 {
18148 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
18149 {
18150 rtx set = single_set (insn);
18151 if (set)
18152 {
18153 avoid_func_arg_motion (first_arg: arg, insn);
18154 return;
18155 }
18156 }
18157 if (insn == BB_HEAD (bb))
18158 return;
18159 insn = PREV_INSN (insn);
18160 }
18161}
18162
18163/* Hook for pre-reload schedule - avoid motion of function arguments
18164 passed in likely spilled HW registers. */
18165static void
18166ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
18167{
18168 rtx_insn *insn;
18169 rtx_insn *first_arg = NULL;
18170 if (reload_completed)
18171 return;
18172 while (head != tail && DEBUG_INSN_P (head))
18173 head = NEXT_INSN (insn: head);
18174 for (insn = tail; insn != head; insn = PREV_INSN (insn))
18175 if (INSN_P (insn) && CALL_P (insn))
18176 {
18177 first_arg = add_parameter_dependencies (call: insn, head);
18178 if (first_arg)
18179 {
18180 /* Add dependee for first argument to predecessors if only
18181 region contains more than one block. */
18182 basic_block bb = BLOCK_FOR_INSN (insn);
18183 int rgn = CONTAINING_RGN (bb->index);
18184 int nr_blks = RGN_NR_BLOCKS (rgn);
18185 /* Skip trivial regions and region head blocks that can have
18186 predecessors outside of region. */
18187 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
18188 {
18189 edge e;
18190 edge_iterator ei;
18191
18192 /* Regions are SCCs with the exception of selective
18193 scheduling with pipelining of outer blocks enabled.
18194 So also check that immediate predecessors of a non-head
18195 block are in the same region. */
18196 FOR_EACH_EDGE (e, ei, bb->preds)
18197 {
18198 /* Avoid creating of loop-carried dependencies through
18199 using topological ordering in the region. */
18200 if (rgn == CONTAINING_RGN (e->src->index)
18201 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
18202 add_dependee_for_func_arg (arg: first_arg, bb: e->src);
18203 }
18204 }
18205 insn = first_arg;
18206 if (insn == head)
18207 break;
18208 }
18209 }
18210 else if (first_arg)
18211 avoid_func_arg_motion (first_arg, insn);
18212}
18213
18214/* Hook for pre-reload schedule - set priority of moves from likely spilled
18215 HW registers to maximum, to schedule them at soon as possible. These are
18216 moves from function argument registers at the top of the function entry
18217 and moves from function return value registers after call. */
18218static int
18219ix86_adjust_priority (rtx_insn *insn, int priority)
18220{
18221 rtx set;
18222
18223 if (reload_completed)
18224 return priority;
18225
18226 if (!NONDEBUG_INSN_P (insn))
18227 return priority;
18228
18229 set = single_set (insn);
18230 if (set)
18231 {
18232 rtx tmp = SET_SRC (set);
18233 if (REG_P (tmp)
18234 && HARD_REGISTER_P (tmp)
18235 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
18236 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
18237 return current_sched_info->sched_max_insns_priority;
18238 }
18239
18240 return priority;
18241}
18242
18243/* Prepare for scheduling pass. */
18244static void
18245ix86_sched_init_global (FILE *, int, int)
18246{
18247 /* Install scheduling hooks for current CPU. Some of these hooks are used
18248 in time-critical parts of the scheduler, so we only set them up when
18249 they are actually used. */
18250 switch (ix86_tune)
18251 {
18252 case PROCESSOR_CORE2:
18253 case PROCESSOR_NEHALEM:
18254 case PROCESSOR_SANDYBRIDGE:
18255 case PROCESSOR_HASWELL:
18256 case PROCESSOR_TREMONT:
18257 case PROCESSOR_ALDERLAKE:
18258 case PROCESSOR_GENERIC:
18259 /* Do not perform multipass scheduling for pre-reload schedule
18260 to save compile time. */
18261 if (reload_completed)
18262 {
18263 ix86_core2i7_init_hooks ();
18264 break;
18265 }
18266 /* Fall through. */
18267 default:
18268 targetm.sched.dfa_post_advance_cycle = NULL;
18269 targetm.sched.first_cycle_multipass_init = NULL;
18270 targetm.sched.first_cycle_multipass_begin = NULL;
18271 targetm.sched.first_cycle_multipass_issue = NULL;
18272 targetm.sched.first_cycle_multipass_backtrack = NULL;
18273 targetm.sched.first_cycle_multipass_end = NULL;
18274 targetm.sched.first_cycle_multipass_fini = NULL;
18275 break;
18276 }
18277}
18278
18279
18280/* Implement TARGET_STATIC_RTX_ALIGNMENT. */
18281
18282static HOST_WIDE_INT
18283ix86_static_rtx_alignment (machine_mode mode)
18284{
18285 if (mode == DFmode)
18286 return 64;
18287 if (ALIGN_MODE_128 (mode))
18288 return MAX (128, GET_MODE_ALIGNMENT (mode));
18289 return GET_MODE_ALIGNMENT (mode);
18290}
18291
18292/* Implement TARGET_CONSTANT_ALIGNMENT. */
18293
18294static HOST_WIDE_INT
18295ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
18296{
18297 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18298 || TREE_CODE (exp) == INTEGER_CST)
18299 {
18300 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
18301 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
18302 return MAX (mode_align, align);
18303 }
18304 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18305 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18306 return BITS_PER_WORD;
18307
18308 return align;
18309}
18310
18311/* Implement TARGET_EMPTY_RECORD_P. */
18312
18313static bool
18314ix86_is_empty_record (const_tree type)
18315{
18316 if (!TARGET_64BIT)
18317 return false;
18318 return default_is_empty_record (type);
18319}
18320
18321/* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
18322
18323static void
18324ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
18325{
18326 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
18327
18328 if (!cum->warn_empty)
18329 return;
18330
18331 if (!TYPE_EMPTY_P (type))
18332 return;
18333
18334 /* Don't warn if the function isn't visible outside of the TU. */
18335 if (cum->decl && !TREE_PUBLIC (cum->decl))
18336 return;
18337
18338 tree decl = cum->decl;
18339 if (!decl)
18340 /* If we don't know the target, look at the current TU. */
18341 decl = current_function_decl;
18342
18343 const_tree ctx = get_ultimate_context (decl);
18344 if (ctx == NULL_TREE
18345 || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
18346 return;
18347
18348 /* If the actual size of the type is zero, then there is no change
18349 in how objects of this size are passed. */
18350 if (int_size_in_bytes (type) == 0)
18351 return;
18352
18353 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
18354 "changes in %<-fabi-version=12%> (GCC 8)", type);
18355
18356 /* Only warn once. */
18357 cum->warn_empty = false;
18358}
18359
18360/* This hook returns name of multilib ABI. */
18361
18362static const char *
18363ix86_get_multilib_abi_name (void)
18364{
18365 if (!(TARGET_64BIT_P (ix86_isa_flags)))
18366 return "i386";
18367 else if (TARGET_X32_P (ix86_isa_flags))
18368 return "x32";
18369 else
18370 return "x86_64";
18371}
18372
18373/* Compute the alignment for a variable for Intel MCU psABI. TYPE is
18374 the data type, and ALIGN is the alignment that the object would
18375 ordinarily have. */
18376
18377static int
18378iamcu_alignment (tree type, int align)
18379{
18380 machine_mode mode;
18381
18382 if (align < 32 || TYPE_USER_ALIGN (type))
18383 return align;
18384
18385 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
18386 bytes. */
18387 type = strip_array_types (type);
18388 if (TYPE_ATOMIC (type))
18389 return align;
18390
18391 mode = TYPE_MODE (type);
18392 switch (GET_MODE_CLASS (mode))
18393 {
18394 case MODE_INT:
18395 case MODE_COMPLEX_INT:
18396 case MODE_COMPLEX_FLOAT:
18397 case MODE_FLOAT:
18398 case MODE_DECIMAL_FLOAT:
18399 return 32;
18400 default:
18401 return align;
18402 }
18403}
18404
18405/* Compute the alignment for a static variable.
18406 TYPE is the data type, and ALIGN is the alignment that
18407 the object would ordinarily have. The value of this function is used
18408 instead of that alignment to align the object. */
18409
18410int
18411ix86_data_alignment (tree type, unsigned int align, bool opt)
18412{
18413 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
18414 for symbols from other compilation units or symbols that don't need
18415 to bind locally. In order to preserve some ABI compatibility with
18416 those compilers, ensure we don't decrease alignment from what we
18417 used to assume. */
18418
18419 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
18420
18421 /* A data structure, equal or greater than the size of a cache line
18422 (64 bytes in the Pentium 4 and other recent Intel processors, including
18423 processors based on Intel Core microarchitecture) should be aligned
18424 so that its base address is a multiple of a cache line size. */
18425
18426 unsigned int max_align
18427 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
18428
18429 if (max_align < BITS_PER_WORD)
18430 max_align = BITS_PER_WORD;
18431
18432 switch (ix86_align_data_type)
18433 {
18434 case ix86_align_data_type_abi: opt = false; break;
18435 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
18436 case ix86_align_data_type_cacheline: break;
18437 }
18438
18439 if (TARGET_IAMCU)
18440 align = iamcu_alignment (type, align);
18441
18442 if (opt
18443 && AGGREGATE_TYPE_P (type)
18444 && TYPE_SIZE (type)
18445 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
18446 {
18447 if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align_compat)
18448 && align < max_align_compat)
18449 align = max_align_compat;
18450 if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align)
18451 && align < max_align)
18452 align = max_align;
18453 }
18454
18455 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18456 to 16byte boundary. */
18457 if (TARGET_64BIT)
18458 {
18459 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
18460 && TYPE_SIZE (type)
18461 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18462 && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128)
18463 && align < 128)
18464 return 128;
18465 }
18466
18467 if (!opt)
18468 return align;
18469
18470 if (TREE_CODE (type) == ARRAY_TYPE)
18471 {
18472 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18473 return 64;
18474 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18475 return 128;
18476 }
18477 else if (TREE_CODE (type) == COMPLEX_TYPE)
18478 {
18479
18480 if (TYPE_MODE (type) == DCmode && align < 64)
18481 return 64;
18482 if ((TYPE_MODE (type) == XCmode
18483 || TYPE_MODE (type) == TCmode) && align < 128)
18484 return 128;
18485 }
18486 else if (RECORD_OR_UNION_TYPE_P (type)
18487 && TYPE_FIELDS (type))
18488 {
18489 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18490 return 64;
18491 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18492 return 128;
18493 }
18494 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18495 || TREE_CODE (type) == INTEGER_TYPE)
18496 {
18497 if (TYPE_MODE (type) == DFmode && align < 64)
18498 return 64;
18499 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18500 return 128;
18501 }
18502
18503 return align;
18504}
18505
18506/* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
18507static void
18508ix86_lower_local_decl_alignment (tree decl)
18509{
18510 unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
18511 DECL_ALIGN (decl), true);
18512 if (new_align < DECL_ALIGN (decl))
18513 SET_DECL_ALIGN (decl, new_align);
18514}
18515
18516/* Compute the alignment for a local variable or a stack slot. EXP is
18517 the data type or decl itself, MODE is the widest mode available and
18518 ALIGN is the alignment that the object would ordinarily have. The
18519 value of this macro is used instead of that alignment to align the
18520 object. */
18521
18522unsigned int
18523ix86_local_alignment (tree exp, machine_mode mode,
18524 unsigned int align, bool may_lower)
18525{
18526 tree type, decl;
18527
18528 if (exp && DECL_P (exp))
18529 {
18530 type = TREE_TYPE (exp);
18531 decl = exp;
18532 }
18533 else
18534 {
18535 type = exp;
18536 decl = NULL;
18537 }
18538
18539 /* Don't do dynamic stack realignment for long long objects with
18540 -mpreferred-stack-boundary=2. */
18541 if (may_lower
18542 && !TARGET_64BIT
18543 && align == 64
18544 && ix86_preferred_stack_boundary < 64
18545 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
18546 && (!type || (!TYPE_USER_ALIGN (type)
18547 && !TYPE_ATOMIC (strip_array_types (type))))
18548 && (!decl || !DECL_USER_ALIGN (decl)))
18549 align = 32;
18550
18551 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18552 register in MODE. We will return the largest alignment of XF
18553 and DF. */
18554 if (!type)
18555 {
18556 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18557 align = GET_MODE_ALIGNMENT (DFmode);
18558 return align;
18559 }
18560
18561 /* Don't increase alignment for Intel MCU psABI. */
18562 if (TARGET_IAMCU)
18563 return align;
18564
18565 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18566 to 16byte boundary. Exact wording is:
18567
18568 An array uses the same alignment as its elements, except that a local or
18569 global array variable of length at least 16 bytes or
18570 a C99 variable-length array variable always has alignment of at least 16 bytes.
18571
18572 This was added to allow use of aligned SSE instructions at arrays. This
18573 rule is meant for static storage (where compiler cannot do the analysis
18574 by itself). We follow it for automatic variables only when convenient.
18575 We fully control everything in the function compiled and functions from
18576 other unit cannot rely on the alignment.
18577
18578 Exclude va_list type. It is the common case of local array where
18579 we cannot benefit from the alignment.
18580
18581 TODO: Probably one should optimize for size only when var is not escaping. */
18582 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
18583 && TARGET_SSE)
18584 {
18585 if (AGGREGATE_TYPE_P (type)
18586 && (va_list_type_node == NULL_TREE
18587 || (TYPE_MAIN_VARIANT (type)
18588 != TYPE_MAIN_VARIANT (va_list_type_node)))
18589 && TYPE_SIZE (type)
18590 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18591 && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128)
18592 && align < 128)
18593 return 128;
18594 }
18595 if (TREE_CODE (type) == ARRAY_TYPE)
18596 {
18597 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18598 return 64;
18599 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18600 return 128;
18601 }
18602 else if (TREE_CODE (type) == COMPLEX_TYPE)
18603 {
18604 if (TYPE_MODE (type) == DCmode && align < 64)
18605 return 64;
18606 if ((TYPE_MODE (type) == XCmode
18607 || TYPE_MODE (type) == TCmode) && align < 128)
18608 return 128;
18609 }
18610 else if (RECORD_OR_UNION_TYPE_P (type)
18611 && TYPE_FIELDS (type))
18612 {
18613 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18614 return 64;
18615 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18616 return 128;
18617 }
18618 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18619 || TREE_CODE (type) == INTEGER_TYPE)
18620 {
18621
18622 if (TYPE_MODE (type) == DFmode && align < 64)
18623 return 64;
18624 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18625 return 128;
18626 }
18627 return align;
18628}
18629
18630/* Compute the minimum required alignment for dynamic stack realignment
18631 purposes for a local variable, parameter or a stack slot. EXP is
18632 the data type or decl itself, MODE is its mode and ALIGN is the
18633 alignment that the object would ordinarily have. */
18634
18635unsigned int
18636ix86_minimum_alignment (tree exp, machine_mode mode,
18637 unsigned int align)
18638{
18639 tree type, decl;
18640
18641 if (exp && DECL_P (exp))
18642 {
18643 type = TREE_TYPE (exp);
18644 decl = exp;
18645 }
18646 else
18647 {
18648 type = exp;
18649 decl = NULL;
18650 }
18651
18652 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
18653 return align;
18654
18655 /* Don't do dynamic stack realignment for long long objects with
18656 -mpreferred-stack-boundary=2. */
18657 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
18658 && (!type || (!TYPE_USER_ALIGN (type)
18659 && !TYPE_ATOMIC (strip_array_types (type))))
18660 && (!decl || !DECL_USER_ALIGN (decl)))
18661 {
18662 gcc_checking_assert (!TARGET_STV);
18663 return 32;
18664 }
18665
18666 return align;
18667}
18668
18669/* Find a location for the static chain incoming to a nested function.
18670 This is a register, unless all free registers are used by arguments. */
18671
18672static rtx
18673ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
18674{
18675 unsigned regno;
18676
18677 if (TARGET_64BIT)
18678 {
18679 /* We always use R10 in 64-bit mode. */
18680 regno = R10_REG;
18681 }
18682 else
18683 {
18684 const_tree fntype, fndecl;
18685 unsigned int ccvt;
18686
18687 /* By default in 32-bit mode we use ECX to pass the static chain. */
18688 regno = CX_REG;
18689
18690 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
18691 {
18692 fntype = TREE_TYPE (fndecl_or_type);
18693 fndecl = fndecl_or_type;
18694 }
18695 else
18696 {
18697 fntype = fndecl_or_type;
18698 fndecl = NULL;
18699 }
18700
18701 ccvt = ix86_get_callcvt (type: fntype);
18702 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
18703 {
18704 /* Fastcall functions use ecx/edx for arguments, which leaves
18705 us with EAX for the static chain.
18706 Thiscall functions use ecx for arguments, which also
18707 leaves us with EAX for the static chain. */
18708 regno = AX_REG;
18709 }
18710 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
18711 {
18712 /* Thiscall functions use ecx for arguments, which leaves
18713 us with EAX and EDX for the static chain.
18714 We are using for abi-compatibility EAX. */
18715 regno = AX_REG;
18716 }
18717 else if (ix86_function_regparm (type: fntype, decl: fndecl) == 3)
18718 {
18719 /* For regparm 3, we have no free call-clobbered registers in
18720 which to store the static chain. In order to implement this,
18721 we have the trampoline push the static chain to the stack.
18722 However, we can't push a value below the return address when
18723 we call the nested function directly, so we have to use an
18724 alternate entry point. For this we use ESI, and have the
18725 alternate entry point push ESI, so that things appear the
18726 same once we're executing the nested function. */
18727 if (incoming_p)
18728 {
18729 if (fndecl == current_function_decl
18730 && !ix86_static_chain_on_stack)
18731 {
18732 gcc_assert (!reload_completed);
18733 ix86_static_chain_on_stack = true;
18734 }
18735 return gen_frame_mem (SImode,
18736 plus_constant (Pmode,
18737 arg_pointer_rtx, -8));
18738 }
18739 regno = SI_REG;
18740 }
18741 }
18742
18743 return gen_rtx_REG (Pmode, regno);
18744}
18745
18746/* Emit RTL insns to initialize the variable parts of a trampoline.
18747 FNDECL is the decl of the target address; M_TRAMP is a MEM for
18748 the trampoline, and CHAIN_VALUE is an RTX for the static chain
18749 to be passed to the target function. */
18750
18751static void
18752ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
18753{
18754 rtx mem, fnaddr;
18755 int opcode;
18756 int offset = 0;
18757 bool need_endbr = (flag_cf_protection & CF_BRANCH);
18758
18759 fnaddr = XEXP (DECL_RTL (fndecl), 0);
18760
18761 if (TARGET_64BIT)
18762 {
18763 int size;
18764
18765 if (need_endbr)
18766 {
18767 /* Insert ENDBR64. */
18768 mem = adjust_address (m_tramp, SImode, offset);
18769 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
18770 offset += 4;
18771 }
18772
18773 /* Load the function address to r11. Try to load address using
18774 the shorter movl instead of movabs. We may want to support
18775 movq for kernel mode, but kernel does not use trampolines at
18776 the moment. FNADDR is a 32bit address and may not be in
18777 DImode when ptr_mode == SImode. Always use movl in this
18778 case. */
18779 if (ptr_mode == SImode
18780 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18781 {
18782 fnaddr = copy_addr_to_reg (fnaddr);
18783
18784 mem = adjust_address (m_tramp, HImode, offset);
18785 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
18786
18787 mem = adjust_address (m_tramp, SImode, offset + 2);
18788 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
18789 offset += 6;
18790 }
18791 else
18792 {
18793 mem = adjust_address (m_tramp, HImode, offset);
18794 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
18795
18796 mem = adjust_address (m_tramp, DImode, offset + 2);
18797 emit_move_insn (mem, fnaddr);
18798 offset += 10;
18799 }
18800
18801 /* Load static chain using movabs to r10. Use the shorter movl
18802 instead of movabs when ptr_mode == SImode. */
18803 if (ptr_mode == SImode)
18804 {
18805 opcode = 0xba41;
18806 size = 6;
18807 }
18808 else
18809 {
18810 opcode = 0xba49;
18811 size = 10;
18812 }
18813
18814 mem = adjust_address (m_tramp, HImode, offset);
18815 emit_move_insn (mem, gen_int_mode (opcode, HImode));
18816
18817 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
18818 emit_move_insn (mem, chain_value);
18819 offset += size;
18820
18821 /* Jump to r11; the last (unused) byte is a nop, only there to
18822 pad the write out to a single 32-bit store. */
18823 mem = adjust_address (m_tramp, SImode, offset);
18824 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
18825 offset += 4;
18826 }
18827 else
18828 {
18829 rtx disp, chain;
18830
18831 /* Depending on the static chain location, either load a register
18832 with a constant, or push the constant to the stack. All of the
18833 instructions are the same size. */
18834 chain = ix86_static_chain (fndecl_or_type: fndecl, incoming_p: true);
18835 if (REG_P (chain))
18836 {
18837 switch (REGNO (chain))
18838 {
18839 case AX_REG:
18840 opcode = 0xb8; break;
18841 case CX_REG:
18842 opcode = 0xb9; break;
18843 default:
18844 gcc_unreachable ();
18845 }
18846 }
18847 else
18848 opcode = 0x68;
18849
18850 if (need_endbr)
18851 {
18852 /* Insert ENDBR32. */
18853 mem = adjust_address (m_tramp, SImode, offset);
18854 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
18855 offset += 4;
18856 }
18857
18858 mem = adjust_address (m_tramp, QImode, offset);
18859 emit_move_insn (mem, gen_int_mode (opcode, QImode));
18860
18861 mem = adjust_address (m_tramp, SImode, offset + 1);
18862 emit_move_insn (mem, chain_value);
18863 offset += 5;
18864
18865 mem = adjust_address (m_tramp, QImode, offset);
18866 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
18867
18868 mem = adjust_address (m_tramp, SImode, offset + 1);
18869
18870 /* Compute offset from the end of the jmp to the target function.
18871 In the case in which the trampoline stores the static chain on
18872 the stack, we need to skip the first insn which pushes the
18873 (call-saved) register static chain; this push is 1 byte. */
18874 offset += 5;
18875 int skip = MEM_P (chain) ? 1 : 0;
18876 /* Skip ENDBR32 at the entry of the target function. */
18877 if (need_endbr
18878 && !cgraph_node::get (decl: fndecl)->only_called_directly_p ())
18879 skip += 4;
18880 disp = expand_binop (SImode, sub_optab, fnaddr,
18881 plus_constant (Pmode, XEXP (m_tramp, 0),
18882 offset - skip),
18883 NULL_RTX, 1, OPTAB_DIRECT);
18884 emit_move_insn (mem, disp);
18885 }
18886
18887 gcc_assert (offset <= TRAMPOLINE_SIZE);
18888
18889#ifdef HAVE_ENABLE_EXECUTE_STACK
18890#ifdef CHECK_EXECUTE_STACK_ENABLED
18891 if (CHECK_EXECUTE_STACK_ENABLED)
18892#endif
18893 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18894 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
18895#endif
18896}
18897
18898static bool
18899ix86_allocate_stack_slots_for_args (void)
18900{
18901 /* Naked functions should not allocate stack slots for arguments. */
18902 return !ix86_function_naked (fn: current_function_decl);
18903}
18904
18905static bool
18906ix86_warn_func_return (tree decl)
18907{
18908 /* Naked functions are implemented entirely in assembly, including the
18909 return sequence, so suppress warnings about this. */
18910 return !ix86_function_naked (fn: decl);
18911}
18912
18913/* Return the shift count of a vector by scalar shift builtin second argument
18914 ARG1. */
18915static tree
18916ix86_vector_shift_count (tree arg1)
18917{
18918 if (tree_fits_uhwi_p (arg1))
18919 return arg1;
18920 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
18921 {
18922 /* The count argument is weird, passed in as various 128-bit
18923 (or 64-bit) vectors, the low 64 bits from it are the count. */
18924 unsigned char buf[16];
18925 int len = native_encode_expr (arg1, buf, 16);
18926 if (len == 0)
18927 return NULL_TREE;
18928 tree t = native_interpret_expr (uint64_type_node, buf, len);
18929 if (t && tree_fits_uhwi_p (t))
18930 return t;
18931 }
18932 return NULL_TREE;
18933}
18934
18935/* Return true if arg_mask is all ones, ELEMS is elements number of
18936 corresponding vector. */
18937static bool
18938ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
18939{
18940 if (TREE_CODE (arg_mask) != INTEGER_CST)
18941 return false;
18942
18943 unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
18944 if (elems == HOST_BITS_PER_WIDE_INT)
18945 return mask == HOST_WIDE_INT_M1U;
18946 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
18947 return false;
18948
18949 return true;
18950}
18951
18952static tree
18953ix86_fold_builtin (tree fndecl, int n_args,
18954 tree *args, bool ignore ATTRIBUTE_UNUSED)
18955{
18956 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
18957 {
18958 enum ix86_builtins fn_code
18959 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl);
18960 enum rtx_code rcode;
18961 bool is_vshift;
18962 enum tree_code tcode;
18963 bool is_scalar;
18964 unsigned HOST_WIDE_INT mask;
18965
18966 switch (fn_code)
18967 {
18968 case IX86_BUILTIN_CPU_IS:
18969 case IX86_BUILTIN_CPU_SUPPORTS:
18970 gcc_assert (n_args == 1);
18971 return fold_builtin_cpu (fndecl, args);
18972
18973 case IX86_BUILTIN_NANQ:
18974 case IX86_BUILTIN_NANSQ:
18975 {
18976 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18977 const char *str = c_getstr (*args);
18978 int quiet = fn_code == IX86_BUILTIN_NANQ;
18979 REAL_VALUE_TYPE real;
18980
18981 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
18982 return build_real (type, real);
18983 return NULL_TREE;
18984 }
18985
18986 case IX86_BUILTIN_INFQ:
18987 case IX86_BUILTIN_HUGE_VALQ:
18988 {
18989 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18990 REAL_VALUE_TYPE inf;
18991 real_inf (&inf);
18992 return build_real (type, inf);
18993 }
18994
18995 case IX86_BUILTIN_TZCNT16:
18996 case IX86_BUILTIN_CTZS:
18997 case IX86_BUILTIN_TZCNT32:
18998 case IX86_BUILTIN_TZCNT64:
18999 gcc_assert (n_args == 1);
19000 if (TREE_CODE (args[0]) == INTEGER_CST)
19001 {
19002 tree type = TREE_TYPE (TREE_TYPE (fndecl));
19003 tree arg = args[0];
19004 if (fn_code == IX86_BUILTIN_TZCNT16
19005 || fn_code == IX86_BUILTIN_CTZS)
19006 arg = fold_convert (short_unsigned_type_node, arg);
19007 if (integer_zerop (arg))
19008 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
19009 else
19010 return fold_const_call (CFN_CTZ, type, arg);
19011 }
19012 break;
19013
19014 case IX86_BUILTIN_LZCNT16:
19015 case IX86_BUILTIN_CLZS:
19016 case IX86_BUILTIN_LZCNT32:
19017 case IX86_BUILTIN_LZCNT64:
19018 gcc_assert (n_args == 1);
19019 if (TREE_CODE (args[0]) == INTEGER_CST)
19020 {
19021 tree type = TREE_TYPE (TREE_TYPE (fndecl));
19022 tree arg = args[0];
19023 if (fn_code == IX86_BUILTIN_LZCNT16
19024 || fn_code == IX86_BUILTIN_CLZS)
19025 arg = fold_convert (short_unsigned_type_node, arg);
19026 if (integer_zerop (arg))
19027 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
19028 else
19029 return fold_const_call (CFN_CLZ, type, arg);
19030 }
19031 break;
19032
19033 case IX86_BUILTIN_BEXTR32:
19034 case IX86_BUILTIN_BEXTR64:
19035 case IX86_BUILTIN_BEXTRI32:
19036 case IX86_BUILTIN_BEXTRI64:
19037 gcc_assert (n_args == 2);
19038 if (tree_fits_uhwi_p (args[1]))
19039 {
19040 unsigned HOST_WIDE_INT res = 0;
19041 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
19042 unsigned int start = tree_to_uhwi (args[1]);
19043 unsigned int len = (start & 0xff00) >> 8;
19044 tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
19045 start &= 0xff;
19046 if (start >= prec || len == 0)
19047 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
19048 args[0]);
19049 else if (!tree_fits_uhwi_p (args[0]))
19050 break;
19051 else
19052 res = tree_to_uhwi (args[0]) >> start;
19053 if (len > prec)
19054 len = prec;
19055 if (len < HOST_BITS_PER_WIDE_INT)
19056 res &= (HOST_WIDE_INT_1U << len) - 1;
19057 return build_int_cstu (type: lhs_type, res);
19058 }
19059 break;
19060
19061 case IX86_BUILTIN_BZHI32:
19062 case IX86_BUILTIN_BZHI64:
19063 gcc_assert (n_args == 2);
19064 if (tree_fits_uhwi_p (args[1]))
19065 {
19066 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
19067 tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
19068 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
19069 return args[0];
19070 if (idx == 0)
19071 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
19072 args[0]);
19073 if (!tree_fits_uhwi_p (args[0]))
19074 break;
19075 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
19076 res &= ~(HOST_WIDE_INT_M1U << idx);
19077 return build_int_cstu (type: lhs_type, res);
19078 }
19079 break;
19080
19081 case IX86_BUILTIN_PDEP32:
19082 case IX86_BUILTIN_PDEP64:
19083 gcc_assert (n_args == 2);
19084 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
19085 {
19086 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
19087 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
19088 unsigned HOST_WIDE_INT res = 0;
19089 unsigned HOST_WIDE_INT m, k = 1;
19090 for (m = 1; m; m <<= 1)
19091 if ((mask & m) != 0)
19092 {
19093 if ((src & k) != 0)
19094 res |= m;
19095 k <<= 1;
19096 }
19097 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
19098 }
19099 break;
19100
19101 case IX86_BUILTIN_PEXT32:
19102 case IX86_BUILTIN_PEXT64:
19103 gcc_assert (n_args == 2);
19104 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
19105 {
19106 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
19107 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
19108 unsigned HOST_WIDE_INT res = 0;
19109 unsigned HOST_WIDE_INT m, k = 1;
19110 for (m = 1; m; m <<= 1)
19111 if ((mask & m) != 0)
19112 {
19113 if ((src & m) != 0)
19114 res |= k;
19115 k <<= 1;
19116 }
19117 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
19118 }
19119 break;
19120
19121 case IX86_BUILTIN_MOVMSKPS:
19122 case IX86_BUILTIN_PMOVMSKB:
19123 case IX86_BUILTIN_MOVMSKPD:
19124 case IX86_BUILTIN_PMOVMSKB128:
19125 case IX86_BUILTIN_MOVMSKPD256:
19126 case IX86_BUILTIN_MOVMSKPS256:
19127 case IX86_BUILTIN_PMOVMSKB256:
19128 gcc_assert (n_args == 1);
19129 if (TREE_CODE (args[0]) == VECTOR_CST)
19130 {
19131 HOST_WIDE_INT res = 0;
19132 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
19133 {
19134 tree e = VECTOR_CST_ELT (args[0], i);
19135 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
19136 {
19137 if (wi::neg_p (x: wi::to_wide (t: e)))
19138 res |= HOST_WIDE_INT_1 << i;
19139 }
19140 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
19141 {
19142 if (TREE_REAL_CST (e).sign)
19143 res |= HOST_WIDE_INT_1 << i;
19144 }
19145 else
19146 return NULL_TREE;
19147 }
19148 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
19149 }
19150 break;
19151
19152 case IX86_BUILTIN_PSLLD:
19153 case IX86_BUILTIN_PSLLD128:
19154 case IX86_BUILTIN_PSLLD128_MASK:
19155 case IX86_BUILTIN_PSLLD256:
19156 case IX86_BUILTIN_PSLLD256_MASK:
19157 case IX86_BUILTIN_PSLLD512:
19158 case IX86_BUILTIN_PSLLDI:
19159 case IX86_BUILTIN_PSLLDI128:
19160 case IX86_BUILTIN_PSLLDI128_MASK:
19161 case IX86_BUILTIN_PSLLDI256:
19162 case IX86_BUILTIN_PSLLDI256_MASK:
19163 case IX86_BUILTIN_PSLLDI512:
19164 case IX86_BUILTIN_PSLLQ:
19165 case IX86_BUILTIN_PSLLQ128:
19166 case IX86_BUILTIN_PSLLQ128_MASK:
19167 case IX86_BUILTIN_PSLLQ256:
19168 case IX86_BUILTIN_PSLLQ256_MASK:
19169 case IX86_BUILTIN_PSLLQ512:
19170 case IX86_BUILTIN_PSLLQI:
19171 case IX86_BUILTIN_PSLLQI128:
19172 case IX86_BUILTIN_PSLLQI128_MASK:
19173 case IX86_BUILTIN_PSLLQI256:
19174 case IX86_BUILTIN_PSLLQI256_MASK:
19175 case IX86_BUILTIN_PSLLQI512:
19176 case IX86_BUILTIN_PSLLW:
19177 case IX86_BUILTIN_PSLLW128:
19178 case IX86_BUILTIN_PSLLW128_MASK:
19179 case IX86_BUILTIN_PSLLW256:
19180 case IX86_BUILTIN_PSLLW256_MASK:
19181 case IX86_BUILTIN_PSLLW512_MASK:
19182 case IX86_BUILTIN_PSLLWI:
19183 case IX86_BUILTIN_PSLLWI128:
19184 case IX86_BUILTIN_PSLLWI128_MASK:
19185 case IX86_BUILTIN_PSLLWI256:
19186 case IX86_BUILTIN_PSLLWI256_MASK:
19187 case IX86_BUILTIN_PSLLWI512_MASK:
19188 rcode = ASHIFT;
19189 is_vshift = false;
19190 goto do_shift;
19191 case IX86_BUILTIN_PSRAD:
19192 case IX86_BUILTIN_PSRAD128:
19193 case IX86_BUILTIN_PSRAD128_MASK:
19194 case IX86_BUILTIN_PSRAD256:
19195 case IX86_BUILTIN_PSRAD256_MASK:
19196 case IX86_BUILTIN_PSRAD512:
19197 case IX86_BUILTIN_PSRADI:
19198 case IX86_BUILTIN_PSRADI128:
19199 case IX86_BUILTIN_PSRADI128_MASK:
19200 case IX86_BUILTIN_PSRADI256:
19201 case IX86_BUILTIN_PSRADI256_MASK:
19202 case IX86_BUILTIN_PSRADI512:
19203 case IX86_BUILTIN_PSRAQ128_MASK:
19204 case IX86_BUILTIN_PSRAQ256_MASK:
19205 case IX86_BUILTIN_PSRAQ512:
19206 case IX86_BUILTIN_PSRAQI128_MASK:
19207 case IX86_BUILTIN_PSRAQI256_MASK:
19208 case IX86_BUILTIN_PSRAQI512:
19209 case IX86_BUILTIN_PSRAW:
19210 case IX86_BUILTIN_PSRAW128:
19211 case IX86_BUILTIN_PSRAW128_MASK:
19212 case IX86_BUILTIN_PSRAW256:
19213 case IX86_BUILTIN_PSRAW256_MASK:
19214 case IX86_BUILTIN_PSRAW512:
19215 case IX86_BUILTIN_PSRAWI:
19216 case IX86_BUILTIN_PSRAWI128:
19217 case IX86_BUILTIN_PSRAWI128_MASK:
19218 case IX86_BUILTIN_PSRAWI256:
19219 case IX86_BUILTIN_PSRAWI256_MASK:
19220 case IX86_BUILTIN_PSRAWI512:
19221 rcode = ASHIFTRT;
19222 is_vshift = false;
19223 goto do_shift;
19224 case IX86_BUILTIN_PSRLD:
19225 case IX86_BUILTIN_PSRLD128:
19226 case IX86_BUILTIN_PSRLD128_MASK:
19227 case IX86_BUILTIN_PSRLD256:
19228 case IX86_BUILTIN_PSRLD256_MASK:
19229 case IX86_BUILTIN_PSRLD512:
19230 case IX86_BUILTIN_PSRLDI:
19231 case IX86_BUILTIN_PSRLDI128:
19232 case IX86_BUILTIN_PSRLDI128_MASK:
19233 case IX86_BUILTIN_PSRLDI256:
19234 case IX86_BUILTIN_PSRLDI256_MASK:
19235 case IX86_BUILTIN_PSRLDI512:
19236 case IX86_BUILTIN_PSRLQ:
19237 case IX86_BUILTIN_PSRLQ128:
19238 case IX86_BUILTIN_PSRLQ128_MASK:
19239 case IX86_BUILTIN_PSRLQ256:
19240 case IX86_BUILTIN_PSRLQ256_MASK:
19241 case IX86_BUILTIN_PSRLQ512:
19242 case IX86_BUILTIN_PSRLQI:
19243 case IX86_BUILTIN_PSRLQI128:
19244 case IX86_BUILTIN_PSRLQI128_MASK:
19245 case IX86_BUILTIN_PSRLQI256:
19246 case IX86_BUILTIN_PSRLQI256_MASK:
19247 case IX86_BUILTIN_PSRLQI512:
19248 case IX86_BUILTIN_PSRLW:
19249 case IX86_BUILTIN_PSRLW128:
19250 case IX86_BUILTIN_PSRLW128_MASK:
19251 case IX86_BUILTIN_PSRLW256:
19252 case IX86_BUILTIN_PSRLW256_MASK:
19253 case IX86_BUILTIN_PSRLW512:
19254 case IX86_BUILTIN_PSRLWI:
19255 case IX86_BUILTIN_PSRLWI128:
19256 case IX86_BUILTIN_PSRLWI128_MASK:
19257 case IX86_BUILTIN_PSRLWI256:
19258 case IX86_BUILTIN_PSRLWI256_MASK:
19259 case IX86_BUILTIN_PSRLWI512:
19260 rcode = LSHIFTRT;
19261 is_vshift = false;
19262 goto do_shift;
19263 case IX86_BUILTIN_PSLLVV16HI:
19264 case IX86_BUILTIN_PSLLVV16SI:
19265 case IX86_BUILTIN_PSLLVV2DI:
19266 case IX86_BUILTIN_PSLLVV2DI_MASK:
19267 case IX86_BUILTIN_PSLLVV32HI:
19268 case IX86_BUILTIN_PSLLVV4DI:
19269 case IX86_BUILTIN_PSLLVV4DI_MASK:
19270 case IX86_BUILTIN_PSLLVV4SI:
19271 case IX86_BUILTIN_PSLLVV4SI_MASK:
19272 case IX86_BUILTIN_PSLLVV8DI:
19273 case IX86_BUILTIN_PSLLVV8HI:
19274 case IX86_BUILTIN_PSLLVV8SI:
19275 case IX86_BUILTIN_PSLLVV8SI_MASK:
19276 rcode = ASHIFT;
19277 is_vshift = true;
19278 goto do_shift;
19279 case IX86_BUILTIN_PSRAVQ128:
19280 case IX86_BUILTIN_PSRAVQ256:
19281 case IX86_BUILTIN_PSRAVV16HI:
19282 case IX86_BUILTIN_PSRAVV16SI:
19283 case IX86_BUILTIN_PSRAVV32HI:
19284 case IX86_BUILTIN_PSRAVV4SI:
19285 case IX86_BUILTIN_PSRAVV4SI_MASK:
19286 case IX86_BUILTIN_PSRAVV8DI:
19287 case IX86_BUILTIN_PSRAVV8HI:
19288 case IX86_BUILTIN_PSRAVV8SI:
19289 case IX86_BUILTIN_PSRAVV8SI_MASK:
19290 rcode = ASHIFTRT;
19291 is_vshift = true;
19292 goto do_shift;
19293 case IX86_BUILTIN_PSRLVV16HI:
19294 case IX86_BUILTIN_PSRLVV16SI:
19295 case IX86_BUILTIN_PSRLVV2DI:
19296 case IX86_BUILTIN_PSRLVV2DI_MASK:
19297 case IX86_BUILTIN_PSRLVV32HI:
19298 case IX86_BUILTIN_PSRLVV4DI:
19299 case IX86_BUILTIN_PSRLVV4DI_MASK:
19300 case IX86_BUILTIN_PSRLVV4SI:
19301 case IX86_BUILTIN_PSRLVV4SI_MASK:
19302 case IX86_BUILTIN_PSRLVV8DI:
19303 case IX86_BUILTIN_PSRLVV8HI:
19304 case IX86_BUILTIN_PSRLVV8SI:
19305 case IX86_BUILTIN_PSRLVV8SI_MASK:
19306 rcode = LSHIFTRT;
19307 is_vshift = true;
19308 goto do_shift;
19309
19310 do_shift:
19311 gcc_assert (n_args >= 2);
19312 if (TREE_CODE (args[0]) != VECTOR_CST)
19313 break;
19314 mask = HOST_WIDE_INT_M1U;
19315 if (n_args > 2)
19316 {
19317 /* This is masked shift. */
19318 if (!tree_fits_uhwi_p (args[n_args - 1])
19319 || TREE_SIDE_EFFECTS (args[n_args - 2]))
19320 break;
19321 mask = tree_to_uhwi (args[n_args - 1]);
19322 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19323 mask |= HOST_WIDE_INT_M1U << elems;
19324 if (mask != HOST_WIDE_INT_M1U
19325 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
19326 break;
19327 if (mask == (HOST_WIDE_INT_M1U << elems))
19328 return args[n_args - 2];
19329 }
19330 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
19331 break;
19332 if (tree tem = (is_vshift ? integer_one_node
19333 : ix86_vector_shift_count (arg1: args[1])))
19334 {
19335 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
19336 unsigned HOST_WIDE_INT prec
19337 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
19338 if (count == 0 && mask == HOST_WIDE_INT_M1U)
19339 return args[0];
19340 if (count >= prec)
19341 {
19342 if (rcode == ASHIFTRT)
19343 count = prec - 1;
19344 else if (mask == HOST_WIDE_INT_M1U)
19345 return build_zero_cst (TREE_TYPE (args[0]));
19346 }
19347 tree countt = NULL_TREE;
19348 if (!is_vshift)
19349 {
19350 if (count >= prec)
19351 countt = integer_zero_node;
19352 else
19353 countt = build_int_cst (integer_type_node, count);
19354 }
19355 tree_vector_builder builder;
19356 if (mask != HOST_WIDE_INT_M1U || is_vshift)
19357 builder.new_vector (TREE_TYPE (args[0]),
19358 npatterns: TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
19359 nelts_per_pattern: 1);
19360 else
19361 builder.new_unary_operation (TREE_TYPE (args[0]), vec: args[0],
19362 allow_stepped_p: false);
19363 unsigned int cnt = builder.encoded_nelts ();
19364 for (unsigned int i = 0; i < cnt; ++i)
19365 {
19366 tree elt = VECTOR_CST_ELT (args[0], i);
19367 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
19368 return NULL_TREE;
19369 tree type = TREE_TYPE (elt);
19370 if (rcode == LSHIFTRT)
19371 elt = fold_convert (unsigned_type_for (type), elt);
19372 if (is_vshift)
19373 {
19374 countt = VECTOR_CST_ELT (args[1], i);
19375 if (TREE_CODE (countt) != INTEGER_CST
19376 || TREE_OVERFLOW (countt))
19377 return NULL_TREE;
19378 if (wi::neg_p (x: wi::to_wide (t: countt))
19379 || wi::to_widest (t: countt) >= prec)
19380 {
19381 if (rcode == ASHIFTRT)
19382 countt = build_int_cst (TREE_TYPE (countt),
19383 prec - 1);
19384 else
19385 {
19386 elt = build_zero_cst (TREE_TYPE (elt));
19387 countt = build_zero_cst (TREE_TYPE (countt));
19388 }
19389 }
19390 }
19391 else if (count >= prec)
19392 elt = build_zero_cst (TREE_TYPE (elt));
19393 elt = const_binop (rcode == ASHIFT
19394 ? LSHIFT_EXPR : RSHIFT_EXPR,
19395 TREE_TYPE (elt), elt, countt);
19396 if (!elt || TREE_CODE (elt) != INTEGER_CST)
19397 return NULL_TREE;
19398 if (rcode == LSHIFTRT)
19399 elt = fold_convert (type, elt);
19400 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
19401 {
19402 elt = VECTOR_CST_ELT (args[n_args - 2], i);
19403 if (TREE_CODE (elt) != INTEGER_CST
19404 || TREE_OVERFLOW (elt))
19405 return NULL_TREE;
19406 }
19407 builder.quick_push (obj: elt);
19408 }
19409 return builder.build ();
19410 }
19411 break;
19412
19413 case IX86_BUILTIN_MINSS:
19414 case IX86_BUILTIN_MINSH_MASK:
19415 tcode = LT_EXPR;
19416 is_scalar = true;
19417 goto do_minmax;
19418
19419 case IX86_BUILTIN_MAXSS:
19420 case IX86_BUILTIN_MAXSH_MASK:
19421 tcode = GT_EXPR;
19422 is_scalar = true;
19423 goto do_minmax;
19424
19425 case IX86_BUILTIN_MINPS:
19426 case IX86_BUILTIN_MINPD:
19427 case IX86_BUILTIN_MINPS256:
19428 case IX86_BUILTIN_MINPD256:
19429 case IX86_BUILTIN_MINPS512:
19430 case IX86_BUILTIN_MINPD512:
19431 case IX86_BUILTIN_MINPS128_MASK:
19432 case IX86_BUILTIN_MINPD128_MASK:
19433 case IX86_BUILTIN_MINPS256_MASK:
19434 case IX86_BUILTIN_MINPD256_MASK:
19435 case IX86_BUILTIN_MINPH128_MASK:
19436 case IX86_BUILTIN_MINPH256_MASK:
19437 case IX86_BUILTIN_MINPH512_MASK:
19438 tcode = LT_EXPR;
19439 is_scalar = false;
19440 goto do_minmax;
19441
19442 case IX86_BUILTIN_MAXPS:
19443 case IX86_BUILTIN_MAXPD:
19444 case IX86_BUILTIN_MAXPS256:
19445 case IX86_BUILTIN_MAXPD256:
19446 case IX86_BUILTIN_MAXPS512:
19447 case IX86_BUILTIN_MAXPD512:
19448 case IX86_BUILTIN_MAXPS128_MASK:
19449 case IX86_BUILTIN_MAXPD128_MASK:
19450 case IX86_BUILTIN_MAXPS256_MASK:
19451 case IX86_BUILTIN_MAXPD256_MASK:
19452 case IX86_BUILTIN_MAXPH128_MASK:
19453 case IX86_BUILTIN_MAXPH256_MASK:
19454 case IX86_BUILTIN_MAXPH512_MASK:
19455 tcode = GT_EXPR;
19456 is_scalar = false;
19457 do_minmax:
19458 gcc_assert (n_args >= 2);
19459 if (TREE_CODE (args[0]) != VECTOR_CST
19460 || TREE_CODE (args[1]) != VECTOR_CST)
19461 break;
19462 mask = HOST_WIDE_INT_M1U;
19463 if (n_args > 2)
19464 {
19465 gcc_assert (n_args >= 4);
19466 /* This is masked minmax. */
19467 if (TREE_CODE (args[3]) != INTEGER_CST
19468 || TREE_SIDE_EFFECTS (args[2]))
19469 break;
19470 mask = TREE_INT_CST_LOW (args[3]);
19471 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19472 mask |= HOST_WIDE_INT_M1U << elems;
19473 if (mask != HOST_WIDE_INT_M1U
19474 && TREE_CODE (args[2]) != VECTOR_CST)
19475 break;
19476 if (n_args >= 5)
19477 {
19478 if (!tree_fits_uhwi_p (args[4]))
19479 break;
19480 if (tree_to_uhwi (args[4]) != 4
19481 && tree_to_uhwi (args[4]) != 8)
19482 break;
19483 }
19484 if (mask == (HOST_WIDE_INT_M1U << elems))
19485 return args[2];
19486 }
19487 /* Punt on NaNs, unless exceptions are disabled. */
19488 if (HONOR_NANS (args[0])
19489 && (n_args < 5 || tree_to_uhwi (args[4]) != 8))
19490 for (int i = 0; i < 2; ++i)
19491 {
19492 unsigned count = vector_cst_encoded_nelts (t: args[i]);
19493 for (unsigned j = 0; j < count; ++j)
19494 if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j)))
19495 return NULL_TREE;
19496 }
19497 {
19498 tree res = const_binop (tcode,
19499 truth_type_for (TREE_TYPE (args[0])),
19500 args[0], args[1]);
19501 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19502 break;
19503 res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res,
19504 args[0], args[1]);
19505 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19506 break;
19507 if (mask != HOST_WIDE_INT_M1U)
19508 {
19509 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19510 vec_perm_builder sel (nelts, nelts, 1);
19511 for (unsigned int i = 0; i < nelts; i++)
19512 if (mask & (HOST_WIDE_INT_1U << i))
19513 sel.quick_push (obj: i);
19514 else
19515 sel.quick_push (obj: nelts + i);
19516 vec_perm_indices indices (sel, 2, nelts);
19517 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2],
19518 indices);
19519 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19520 break;
19521 }
19522 if (is_scalar)
19523 {
19524 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19525 vec_perm_builder sel (nelts, nelts, 1);
19526 sel.quick_push (obj: 0);
19527 for (unsigned int i = 1; i < nelts; i++)
19528 sel.quick_push (obj: nelts + i);
19529 vec_perm_indices indices (sel, 2, nelts);
19530 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0],
19531 indices);
19532 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19533 break;
19534 }
19535 return res;
19536 }
19537
19538 default:
19539 break;
19540 }
19541 }
19542
19543#ifdef SUBTARGET_FOLD_BUILTIN
19544 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
19545#endif
19546
19547 return NULL_TREE;
19548}
19549
19550/* Fold a MD builtin (use ix86_fold_builtin for folding into
19551 constant) in GIMPLE. */
19552
19553bool
19554ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
19555{
19556 gimple *stmt = gsi_stmt (i: *gsi), *g;
19557 gimple_seq stmts = NULL;
19558 tree fndecl = gimple_call_fndecl (gs: stmt);
19559 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
19560 int n_args = gimple_call_num_args (gs: stmt);
19561 enum ix86_builtins fn_code
19562 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl);
19563 tree decl = NULL_TREE;
19564 tree arg0, arg1, arg2;
19565 enum rtx_code rcode;
19566 enum tree_code tcode;
19567 unsigned HOST_WIDE_INT count;
19568 bool is_vshift;
19569 unsigned HOST_WIDE_INT elems;
19570 location_t loc;
19571
19572 /* Don't fold when there's isa mismatch. */
19573 if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
19574 return false;
19575
19576 switch (fn_code)
19577 {
19578 case IX86_BUILTIN_TZCNT32:
19579 decl = builtin_decl_implicit (fncode: BUILT_IN_CTZ);
19580 goto fold_tzcnt_lzcnt;
19581
19582 case IX86_BUILTIN_TZCNT64:
19583 decl = builtin_decl_implicit (fncode: BUILT_IN_CTZLL);
19584 goto fold_tzcnt_lzcnt;
19585
19586 case IX86_BUILTIN_LZCNT32:
19587 decl = builtin_decl_implicit (fncode: BUILT_IN_CLZ);
19588 goto fold_tzcnt_lzcnt;
19589
19590 case IX86_BUILTIN_LZCNT64:
19591 decl = builtin_decl_implicit (fncode: BUILT_IN_CLZLL);
19592 goto fold_tzcnt_lzcnt;
19593
19594 fold_tzcnt_lzcnt:
19595 gcc_assert (n_args == 1);
19596 arg0 = gimple_call_arg (gs: stmt, index: 0);
19597 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (gs: stmt))
19598 {
19599 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
19600 /* If arg0 is provably non-zero, optimize into generic
19601 __builtin_c[tl]z{,ll} function the middle-end handles
19602 better. */
19603 if (!expr_not_equal_to (t: arg0, wi::zero (precision: prec)))
19604 return false;
19605
19606 loc = gimple_location (g: stmt);
19607 g = gimple_build_call (decl, 1, arg0);
19608 gimple_set_location (g, location: loc);
19609 tree lhs = make_ssa_name (integer_type_node);
19610 gimple_call_set_lhs (gs: g, lhs);
19611 gsi_insert_before (gsi, g, GSI_SAME_STMT);
19612 g = gimple_build_assign (gimple_call_lhs (gs: stmt), NOP_EXPR, lhs);
19613 gimple_set_location (g, location: loc);
19614 gsi_replace (gsi, g, false);
19615 return true;
19616 }
19617 break;
19618
19619 case IX86_BUILTIN_BZHI32:
19620 case IX86_BUILTIN_BZHI64:
19621 gcc_assert (n_args == 2);
19622 arg1 = gimple_call_arg (gs: stmt, index: 1);
19623 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (gs: stmt))
19624 {
19625 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
19626 arg0 = gimple_call_arg (gs: stmt, index: 0);
19627 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
19628 break;
19629 loc = gimple_location (g: stmt);
19630 g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0);
19631 gimple_set_location (g, location: loc);
19632 gsi_replace (gsi, g, false);
19633 return true;
19634 }
19635 break;
19636
19637 case IX86_BUILTIN_PDEP32:
19638 case IX86_BUILTIN_PDEP64:
19639 case IX86_BUILTIN_PEXT32:
19640 case IX86_BUILTIN_PEXT64:
19641 gcc_assert (n_args == 2);
19642 arg1 = gimple_call_arg (gs: stmt, index: 1);
19643 if (integer_all_onesp (arg1) && gimple_call_lhs (gs: stmt))
19644 {
19645 loc = gimple_location (g: stmt);
19646 arg0 = gimple_call_arg (gs: stmt, index: 0);
19647 g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0);
19648 gimple_set_location (g, location: loc);
19649 gsi_replace (gsi, g, false);
19650 return true;
19651 }
19652 break;
19653
19654 case IX86_BUILTIN_PBLENDVB256:
19655 case IX86_BUILTIN_BLENDVPS256:
19656 case IX86_BUILTIN_BLENDVPD256:
19657 /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
19658 to scalar operations and not combined back. */
19659 if (!TARGET_AVX2)
19660 break;
19661
19662 /* FALLTHRU. */
19663 case IX86_BUILTIN_BLENDVPD:
19664 /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
19665 w/o sse4.2, it's veclowered to scalar operations and
19666 not combined back. */
19667 if (!TARGET_SSE4_2)
19668 break;
19669 /* FALLTHRU. */
19670 case IX86_BUILTIN_PBLENDVB128:
19671 case IX86_BUILTIN_BLENDVPS:
19672 gcc_assert (n_args == 3);
19673 arg0 = gimple_call_arg (gs: stmt, index: 0);
19674 arg1 = gimple_call_arg (gs: stmt, index: 1);
19675 arg2 = gimple_call_arg (gs: stmt, index: 2);
19676 if (gimple_call_lhs (gs: stmt))
19677 {
19678 loc = gimple_location (g: stmt);
19679 tree type = TREE_TYPE (arg2);
19680 if (VECTOR_FLOAT_TYPE_P (type))
19681 {
19682 tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
19683 ? intSI_type_node : intDI_type_node;
19684 type = get_same_sized_vectype (itype, type);
19685 }
19686 else
19687 type = signed_type_for (type);
19688 arg2 = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR, type, ops: arg2);
19689 tree zero_vec = build_zero_cst (type);
19690 tree cmp_type = truth_type_for (type);
19691 tree cmp = gimple_build (seq: &stmts, code: LT_EXPR, type: cmp_type, ops: arg2, ops: zero_vec);
19692 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19693 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19694 VEC_COND_EXPR, cmp,
19695 arg1, arg0);
19696 gimple_set_location (g, location: loc);
19697 gsi_replace (gsi, g, false);
19698 }
19699 else
19700 gsi_replace (gsi, gimple_build_nop (), false);
19701 return true;
19702
19703
19704 case IX86_BUILTIN_PCMPEQB128:
19705 case IX86_BUILTIN_PCMPEQW128:
19706 case IX86_BUILTIN_PCMPEQD128:
19707 case IX86_BUILTIN_PCMPEQQ:
19708 case IX86_BUILTIN_PCMPEQB256:
19709 case IX86_BUILTIN_PCMPEQW256:
19710 case IX86_BUILTIN_PCMPEQD256:
19711 case IX86_BUILTIN_PCMPEQQ256:
19712 tcode = EQ_EXPR;
19713 goto do_cmp;
19714
19715 case IX86_BUILTIN_PCMPGTB128:
19716 case IX86_BUILTIN_PCMPGTW128:
19717 case IX86_BUILTIN_PCMPGTD128:
19718 case IX86_BUILTIN_PCMPGTQ:
19719 case IX86_BUILTIN_PCMPGTB256:
19720 case IX86_BUILTIN_PCMPGTW256:
19721 case IX86_BUILTIN_PCMPGTD256:
19722 case IX86_BUILTIN_PCMPGTQ256:
19723 tcode = GT_EXPR;
19724
19725 do_cmp:
19726 gcc_assert (n_args == 2);
19727 arg0 = gimple_call_arg (gs: stmt, index: 0);
19728 arg1 = gimple_call_arg (gs: stmt, index: 1);
19729 if (gimple_call_lhs (gs: stmt))
19730 {
19731 loc = gimple_location (g: stmt);
19732 tree type = TREE_TYPE (arg0);
19733 tree zero_vec = build_zero_cst (type);
19734 tree minus_one_vec = build_minus_one_cst (type);
19735 tree cmp_type = truth_type_for (type);
19736 tree cmp = gimple_build (seq: &stmts, code: tcode, type: cmp_type, ops: arg0, ops: arg1);
19737 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19738 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19739 VEC_COND_EXPR, cmp,
19740 minus_one_vec, zero_vec);
19741 gimple_set_location (g, location: loc);
19742 gsi_replace (gsi, g, false);
19743 }
19744 else
19745 gsi_replace (gsi, gimple_build_nop (), false);
19746 return true;
19747
19748 case IX86_BUILTIN_PSLLD:
19749 case IX86_BUILTIN_PSLLD128:
19750 case IX86_BUILTIN_PSLLD128_MASK:
19751 case IX86_BUILTIN_PSLLD256:
19752 case IX86_BUILTIN_PSLLD256_MASK:
19753 case IX86_BUILTIN_PSLLD512:
19754 case IX86_BUILTIN_PSLLDI:
19755 case IX86_BUILTIN_PSLLDI128:
19756 case IX86_BUILTIN_PSLLDI128_MASK:
19757 case IX86_BUILTIN_PSLLDI256:
19758 case IX86_BUILTIN_PSLLDI256_MASK:
19759 case IX86_BUILTIN_PSLLDI512:
19760 case IX86_BUILTIN_PSLLQ:
19761 case IX86_BUILTIN_PSLLQ128:
19762 case IX86_BUILTIN_PSLLQ128_MASK:
19763 case IX86_BUILTIN_PSLLQ256:
19764 case IX86_BUILTIN_PSLLQ256_MASK:
19765 case IX86_BUILTIN_PSLLQ512:
19766 case IX86_BUILTIN_PSLLQI:
19767 case IX86_BUILTIN_PSLLQI128:
19768 case IX86_BUILTIN_PSLLQI128_MASK:
19769 case IX86_BUILTIN_PSLLQI256:
19770 case IX86_BUILTIN_PSLLQI256_MASK:
19771 case IX86_BUILTIN_PSLLQI512:
19772 case IX86_BUILTIN_PSLLW:
19773 case IX86_BUILTIN_PSLLW128:
19774 case IX86_BUILTIN_PSLLW128_MASK:
19775 case IX86_BUILTIN_PSLLW256:
19776 case IX86_BUILTIN_PSLLW256_MASK:
19777 case IX86_BUILTIN_PSLLW512_MASK:
19778 case IX86_BUILTIN_PSLLWI:
19779 case IX86_BUILTIN_PSLLWI128:
19780 case IX86_BUILTIN_PSLLWI128_MASK:
19781 case IX86_BUILTIN_PSLLWI256:
19782 case IX86_BUILTIN_PSLLWI256_MASK:
19783 case IX86_BUILTIN_PSLLWI512_MASK:
19784 rcode = ASHIFT;
19785 is_vshift = false;
19786 goto do_shift;
19787 case IX86_BUILTIN_PSRAD:
19788 case IX86_BUILTIN_PSRAD128:
19789 case IX86_BUILTIN_PSRAD128_MASK:
19790 case IX86_BUILTIN_PSRAD256:
19791 case IX86_BUILTIN_PSRAD256_MASK:
19792 case IX86_BUILTIN_PSRAD512:
19793 case IX86_BUILTIN_PSRADI:
19794 case IX86_BUILTIN_PSRADI128:
19795 case IX86_BUILTIN_PSRADI128_MASK:
19796 case IX86_BUILTIN_PSRADI256:
19797 case IX86_BUILTIN_PSRADI256_MASK:
19798 case IX86_BUILTIN_PSRADI512:
19799 case IX86_BUILTIN_PSRAQ128_MASK:
19800 case IX86_BUILTIN_PSRAQ256_MASK:
19801 case IX86_BUILTIN_PSRAQ512:
19802 case IX86_BUILTIN_PSRAQI128_MASK:
19803 case IX86_BUILTIN_PSRAQI256_MASK:
19804 case IX86_BUILTIN_PSRAQI512:
19805 case IX86_BUILTIN_PSRAW:
19806 case IX86_BUILTIN_PSRAW128:
19807 case IX86_BUILTIN_PSRAW128_MASK:
19808 case IX86_BUILTIN_PSRAW256:
19809 case IX86_BUILTIN_PSRAW256_MASK:
19810 case IX86_BUILTIN_PSRAW512:
19811 case IX86_BUILTIN_PSRAWI:
19812 case IX86_BUILTIN_PSRAWI128:
19813 case IX86_BUILTIN_PSRAWI128_MASK:
19814 case IX86_BUILTIN_PSRAWI256:
19815 case IX86_BUILTIN_PSRAWI256_MASK:
19816 case IX86_BUILTIN_PSRAWI512:
19817 rcode = ASHIFTRT;
19818 is_vshift = false;
19819 goto do_shift;
19820 case IX86_BUILTIN_PSRLD:
19821 case IX86_BUILTIN_PSRLD128:
19822 case IX86_BUILTIN_PSRLD128_MASK:
19823 case IX86_BUILTIN_PSRLD256:
19824 case IX86_BUILTIN_PSRLD256_MASK:
19825 case IX86_BUILTIN_PSRLD512:
19826 case IX86_BUILTIN_PSRLDI:
19827 case IX86_BUILTIN_PSRLDI128:
19828 case IX86_BUILTIN_PSRLDI128_MASK:
19829 case IX86_BUILTIN_PSRLDI256:
19830 case IX86_BUILTIN_PSRLDI256_MASK:
19831 case IX86_BUILTIN_PSRLDI512:
19832 case IX86_BUILTIN_PSRLQ:
19833 case IX86_BUILTIN_PSRLQ128:
19834 case IX86_BUILTIN_PSRLQ128_MASK:
19835 case IX86_BUILTIN_PSRLQ256:
19836 case IX86_BUILTIN_PSRLQ256_MASK:
19837 case IX86_BUILTIN_PSRLQ512:
19838 case IX86_BUILTIN_PSRLQI:
19839 case IX86_BUILTIN_PSRLQI128:
19840 case IX86_BUILTIN_PSRLQI128_MASK:
19841 case IX86_BUILTIN_PSRLQI256:
19842 case IX86_BUILTIN_PSRLQI256_MASK:
19843 case IX86_BUILTIN_PSRLQI512:
19844 case IX86_BUILTIN_PSRLW:
19845 case IX86_BUILTIN_PSRLW128:
19846 case IX86_BUILTIN_PSRLW128_MASK:
19847 case IX86_BUILTIN_PSRLW256:
19848 case IX86_BUILTIN_PSRLW256_MASK:
19849 case IX86_BUILTIN_PSRLW512:
19850 case IX86_BUILTIN_PSRLWI:
19851 case IX86_BUILTIN_PSRLWI128:
19852 case IX86_BUILTIN_PSRLWI128_MASK:
19853 case IX86_BUILTIN_PSRLWI256:
19854 case IX86_BUILTIN_PSRLWI256_MASK:
19855 case IX86_BUILTIN_PSRLWI512:
19856 rcode = LSHIFTRT;
19857 is_vshift = false;
19858 goto do_shift;
19859 case IX86_BUILTIN_PSLLVV16HI:
19860 case IX86_BUILTIN_PSLLVV16SI:
19861 case IX86_BUILTIN_PSLLVV2DI:
19862 case IX86_BUILTIN_PSLLVV2DI_MASK:
19863 case IX86_BUILTIN_PSLLVV32HI:
19864 case IX86_BUILTIN_PSLLVV4DI:
19865 case IX86_BUILTIN_PSLLVV4DI_MASK:
19866 case IX86_BUILTIN_PSLLVV4SI:
19867 case IX86_BUILTIN_PSLLVV4SI_MASK:
19868 case IX86_BUILTIN_PSLLVV8DI:
19869 case IX86_BUILTIN_PSLLVV8HI:
19870 case IX86_BUILTIN_PSLLVV8SI:
19871 case IX86_BUILTIN_PSLLVV8SI_MASK:
19872 rcode = ASHIFT;
19873 is_vshift = true;
19874 goto do_shift;
19875 case IX86_BUILTIN_PSRAVQ128:
19876 case IX86_BUILTIN_PSRAVQ256:
19877 case IX86_BUILTIN_PSRAVV16HI:
19878 case IX86_BUILTIN_PSRAVV16SI:
19879 case IX86_BUILTIN_PSRAVV32HI:
19880 case IX86_BUILTIN_PSRAVV4SI:
19881 case IX86_BUILTIN_PSRAVV4SI_MASK:
19882 case IX86_BUILTIN_PSRAVV8DI:
19883 case IX86_BUILTIN_PSRAVV8HI:
19884 case IX86_BUILTIN_PSRAVV8SI:
19885 case IX86_BUILTIN_PSRAVV8SI_MASK:
19886 rcode = ASHIFTRT;
19887 is_vshift = true;
19888 goto do_shift;
19889 case IX86_BUILTIN_PSRLVV16HI:
19890 case IX86_BUILTIN_PSRLVV16SI:
19891 case IX86_BUILTIN_PSRLVV2DI:
19892 case IX86_BUILTIN_PSRLVV2DI_MASK:
19893 case IX86_BUILTIN_PSRLVV32HI:
19894 case IX86_BUILTIN_PSRLVV4DI:
19895 case IX86_BUILTIN_PSRLVV4DI_MASK:
19896 case IX86_BUILTIN_PSRLVV4SI:
19897 case IX86_BUILTIN_PSRLVV4SI_MASK:
19898 case IX86_BUILTIN_PSRLVV8DI:
19899 case IX86_BUILTIN_PSRLVV8HI:
19900 case IX86_BUILTIN_PSRLVV8SI:
19901 case IX86_BUILTIN_PSRLVV8SI_MASK:
19902 rcode = LSHIFTRT;
19903 is_vshift = true;
19904 goto do_shift;
19905
19906 do_shift:
19907 gcc_assert (n_args >= 2);
19908 if (!gimple_call_lhs (gs: stmt))
19909 {
19910 gsi_replace (gsi, gimple_build_nop (), false);
19911 return true;
19912 }
19913 arg0 = gimple_call_arg (gs: stmt, index: 0);
19914 arg1 = gimple_call_arg (gs: stmt, index: 1);
19915 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19916 /* For masked shift, only optimize if the mask is all ones. */
19917 if (n_args > 2
19918 && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1)))
19919 break;
19920 if (is_vshift)
19921 {
19922 if (TREE_CODE (arg1) != VECTOR_CST)
19923 break;
19924 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
19925 if (integer_zerop (arg1))
19926 count = 0;
19927 else if (rcode == ASHIFTRT)
19928 break;
19929 else
19930 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
19931 {
19932 tree elt = VECTOR_CST_ELT (arg1, i);
19933 if (!wi::neg_p (x: wi::to_wide (t: elt))
19934 && wi::to_widest (t: elt) < count)
19935 return false;
19936 }
19937 }
19938 else
19939 {
19940 arg1 = ix86_vector_shift_count (arg1);
19941 if (!arg1)
19942 break;
19943 count = tree_to_uhwi (arg1);
19944 }
19945 if (count == 0)
19946 {
19947 /* Just return the first argument for shift by 0. */
19948 loc = gimple_location (g: stmt);
19949 g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0);
19950 gimple_set_location (g, location: loc);
19951 gsi_replace (gsi, g, false);
19952 return true;
19953 }
19954 if (rcode != ASHIFTRT
19955 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
19956 {
19957 /* For shift counts equal or greater than precision, except for
19958 arithmetic right shift the result is zero. */
19959 loc = gimple_location (g: stmt);
19960 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19961 build_zero_cst (TREE_TYPE (arg0)));
19962 gimple_set_location (g, location: loc);
19963 gsi_replace (gsi, g, false);
19964 return true;
19965 }
19966 break;
19967
19968 case IX86_BUILTIN_SHUFPD512:
19969 case IX86_BUILTIN_SHUFPS512:
19970 case IX86_BUILTIN_SHUFPD:
19971 case IX86_BUILTIN_SHUFPD256:
19972 case IX86_BUILTIN_SHUFPS:
19973 case IX86_BUILTIN_SHUFPS256:
19974 arg0 = gimple_call_arg (gs: stmt, index: 0);
19975 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19976 /* This is masked shuffle. Only optimize if the mask is all ones. */
19977 if (n_args > 3
19978 && !ix86_masked_all_ones (elems,
19979 arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1)))
19980 break;
19981 arg2 = gimple_call_arg (gs: stmt, index: 2);
19982 if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (gs: stmt))
19983 {
19984 unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
19985 /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
19986 if (shuffle_mask > 255)
19987 return false;
19988
19989 machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
19990 loc = gimple_location (g: stmt);
19991 tree itype = (imode == E_DFmode
19992 ? long_long_integer_type_node : integer_type_node);
19993 tree vtype = build_vector_type (itype, elems);
19994 tree_vector_builder elts (vtype, elems, 1);
19995
19996
19997 /* Transform integer shuffle_mask to vector perm_mask which
19998 is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
19999 for (unsigned i = 0; i != elems; i++)
20000 {
20001 unsigned sel_idx;
20002 /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
20003 provide 2 select constrols for each element of the
20004 destination. */
20005 if (imode == E_DFmode)
20006 sel_idx = (i & 1) * elems + (i & ~1)
20007 + ((shuffle_mask >> i) & 1);
20008 else
20009 {
20010 /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
20011 controls for each element of the destination. */
20012 unsigned j = i % 4;
20013 sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
20014 + ((shuffle_mask >> 2 * j) & 3);
20015 }
20016 elts.quick_push (obj: build_int_cst (itype, sel_idx));
20017 }
20018
20019 tree perm_mask = elts.build ();
20020 arg1 = gimple_call_arg (gs: stmt, index: 1);
20021 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
20022 VEC_PERM_EXPR,
20023 arg0, arg1, perm_mask);
20024 gimple_set_location (g, location: loc);
20025 gsi_replace (gsi, g, false);
20026 return true;
20027 }
20028 // Do not error yet, the constant could be propagated later?
20029 break;
20030
20031 case IX86_BUILTIN_PABSB:
20032 case IX86_BUILTIN_PABSW:
20033 case IX86_BUILTIN_PABSD:
20034 /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
20035 if (!TARGET_MMX_WITH_SSE)
20036 break;
20037 /* FALLTHRU. */
20038 case IX86_BUILTIN_PABSB128:
20039 case IX86_BUILTIN_PABSB256:
20040 case IX86_BUILTIN_PABSB512:
20041 case IX86_BUILTIN_PABSW128:
20042 case IX86_BUILTIN_PABSW256:
20043 case IX86_BUILTIN_PABSW512:
20044 case IX86_BUILTIN_PABSD128:
20045 case IX86_BUILTIN_PABSD256:
20046 case IX86_BUILTIN_PABSD512:
20047 case IX86_BUILTIN_PABSQ128:
20048 case IX86_BUILTIN_PABSQ256:
20049 case IX86_BUILTIN_PABSQ512:
20050 case IX86_BUILTIN_PABSB128_MASK:
20051 case IX86_BUILTIN_PABSB256_MASK:
20052 case IX86_BUILTIN_PABSW128_MASK:
20053 case IX86_BUILTIN_PABSW256_MASK:
20054 case IX86_BUILTIN_PABSD128_MASK:
20055 case IX86_BUILTIN_PABSD256_MASK:
20056 gcc_assert (n_args >= 1);
20057 if (!gimple_call_lhs (gs: stmt))
20058 {
20059 gsi_replace (gsi, gimple_build_nop (), false);
20060 return true;
20061 }
20062 arg0 = gimple_call_arg (gs: stmt, index: 0);
20063 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20064 /* For masked ABS, only optimize if the mask is all ones. */
20065 if (n_args > 1
20066 && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1)))
20067 break;
20068 {
20069 tree utype, ures, vce;
20070 utype = unsigned_type_for (TREE_TYPE (arg0));
20071 /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
20072 instead of ABS_EXPR to handle overflow case(TYPE_MIN). */
20073 ures = gimple_build (seq: &stmts, code: ABSU_EXPR, type: utype, ops: arg0);
20074 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
20075 loc = gimple_location (g: stmt);
20076 vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
20077 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
20078 VIEW_CONVERT_EXPR, vce);
20079 gsi_replace (gsi, g, false);
20080 }
20081 return true;
20082
20083 case IX86_BUILTIN_MINPS:
20084 case IX86_BUILTIN_MINPD:
20085 case IX86_BUILTIN_MINPS256:
20086 case IX86_BUILTIN_MINPD256:
20087 case IX86_BUILTIN_MINPS512:
20088 case IX86_BUILTIN_MINPD512:
20089 case IX86_BUILTIN_MINPS128_MASK:
20090 case IX86_BUILTIN_MINPD128_MASK:
20091 case IX86_BUILTIN_MINPS256_MASK:
20092 case IX86_BUILTIN_MINPD256_MASK:
20093 case IX86_BUILTIN_MINPH128_MASK:
20094 case IX86_BUILTIN_MINPH256_MASK:
20095 case IX86_BUILTIN_MINPH512_MASK:
20096 tcode = LT_EXPR;
20097 goto do_minmax;
20098
20099 case IX86_BUILTIN_MAXPS:
20100 case IX86_BUILTIN_MAXPD:
20101 case IX86_BUILTIN_MAXPS256:
20102 case IX86_BUILTIN_MAXPD256:
20103 case IX86_BUILTIN_MAXPS512:
20104 case IX86_BUILTIN_MAXPD512:
20105 case IX86_BUILTIN_MAXPS128_MASK:
20106 case IX86_BUILTIN_MAXPD128_MASK:
20107 case IX86_BUILTIN_MAXPS256_MASK:
20108 case IX86_BUILTIN_MAXPD256_MASK:
20109 case IX86_BUILTIN_MAXPH128_MASK:
20110 case IX86_BUILTIN_MAXPH256_MASK:
20111 case IX86_BUILTIN_MAXPH512_MASK:
20112 tcode = GT_EXPR;
20113 do_minmax:
20114 gcc_assert (n_args >= 2);
20115 /* Without SSE4.1 we often aren't able to pattern match it back to the
20116 desired instruction. */
20117 if (!gimple_call_lhs (gs: stmt) || !optimize || !TARGET_SSE4_1)
20118 break;
20119 arg0 = gimple_call_arg (gs: stmt, index: 0);
20120 arg1 = gimple_call_arg (gs: stmt, index: 1);
20121 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
20122 /* For masked minmax, only optimize if the mask is all ones. */
20123 if (n_args > 2
20124 && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: 3)))
20125 break;
20126 if (n_args >= 5)
20127 {
20128 tree arg4 = gimple_call_arg (gs: stmt, index: 4);
20129 if (!tree_fits_uhwi_p (arg4))
20130 break;
20131 if (tree_to_uhwi (arg4) == 4)
20132 /* Ok. */;
20133 else if (tree_to_uhwi (arg4) != 8)
20134 /* Invalid round argument. */
20135 break;
20136 else if (HONOR_NANS (arg0))
20137 /* Lowering to comparison would raise exceptions which
20138 shouldn't be raised. */
20139 break;
20140 }
20141 {
20142 tree type = truth_type_for (TREE_TYPE (arg0));
20143 tree cmpres = gimple_build (seq: &stmts, code: tcode, type, ops: arg0, ops: arg1);
20144 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
20145 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
20146 VEC_COND_EXPR, cmpres, arg0, arg1);
20147 gsi_replace (gsi, g, false);
20148 }
20149 return true;
20150
20151 default:
20152 break;
20153 }
20154
20155 return false;
20156}
20157
20158/* Handler for an SVML-style interface to
20159 a library with vectorized intrinsics. */
20160
20161tree
20162ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
20163{
20164 char name[20];
20165 tree fntype, new_fndecl, args;
20166 unsigned arity;
20167 const char *bname;
20168 machine_mode el_mode, in_mode;
20169 int n, in_n;
20170
20171 /* The SVML is suitable for unsafe math only. */
20172 if (!flag_unsafe_math_optimizations)
20173 return NULL_TREE;
20174
20175 el_mode = TYPE_MODE (TREE_TYPE (type_out));
20176 n = TYPE_VECTOR_SUBPARTS (node: type_out);
20177 in_mode = TYPE_MODE (TREE_TYPE (type_in));
20178 in_n = TYPE_VECTOR_SUBPARTS (node: type_in);
20179 if (el_mode != in_mode
20180 || n != in_n)
20181 return NULL_TREE;
20182
20183 switch (fn)
20184 {
20185 CASE_CFN_EXP:
20186 CASE_CFN_LOG:
20187 CASE_CFN_LOG10:
20188 CASE_CFN_POW:
20189 CASE_CFN_TANH:
20190 CASE_CFN_TAN:
20191 CASE_CFN_ATAN:
20192 CASE_CFN_ATAN2:
20193 CASE_CFN_ATANH:
20194 CASE_CFN_CBRT:
20195 CASE_CFN_SINH:
20196 CASE_CFN_SIN:
20197 CASE_CFN_ASINH:
20198 CASE_CFN_ASIN:
20199 CASE_CFN_COSH:
20200 CASE_CFN_COS:
20201 CASE_CFN_ACOSH:
20202 CASE_CFN_ACOS:
20203 if ((el_mode != DFmode || n != 2)
20204 && (el_mode != SFmode || n != 4))
20205 return NULL_TREE;
20206 break;
20207
20208 default:
20209 return NULL_TREE;
20210 }
20211
20212 tree fndecl = mathfn_built_in (el_mode == DFmode
20213 ? double_type_node : float_type_node, fn);
20214 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20215
20216 if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOGF)
20217 strcpy (dest: name, src: "vmlsLn4");
20218 else if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOG)
20219 strcpy (dest: name, src: "vmldLn2");
20220 else if (n == 4)
20221 {
20222 sprintf (s: name, format: "vmls%s", bname+10);
20223 name[strlen (s: name)-1] = '4';
20224 }
20225 else
20226 sprintf (s: name, format: "vmld%s2", bname+10);
20227
20228 /* Convert to uppercase. */
20229 name[4] &= ~0x20;
20230
20231 arity = 0;
20232 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20233 arity++;
20234
20235 if (arity == 1)
20236 fntype = build_function_type_list (type_out, type_in, NULL);
20237 else
20238 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20239
20240 /* Build a function declaration for the vectorized function. */
20241 new_fndecl = build_decl (BUILTINS_LOCATION,
20242 FUNCTION_DECL, get_identifier (name), fntype);
20243 TREE_PUBLIC (new_fndecl) = 1;
20244 DECL_EXTERNAL (new_fndecl) = 1;
20245 DECL_IS_NOVOPS (new_fndecl) = 1;
20246 TREE_READONLY (new_fndecl) = 1;
20247
20248 return new_fndecl;
20249}
20250
20251/* Handler for an ACML-style interface to
20252 a library with vectorized intrinsics. */
20253
20254tree
20255ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
20256{
20257 char name[20] = "__vr.._";
20258 tree fntype, new_fndecl, args;
20259 unsigned arity;
20260 const char *bname;
20261 machine_mode el_mode, in_mode;
20262 int n, in_n;
20263
20264 /* The ACML is 64bits only and suitable for unsafe math only as
20265 it does not correctly support parts of IEEE with the required
20266 precision such as denormals. */
20267 if (!TARGET_64BIT
20268 || !flag_unsafe_math_optimizations)
20269 return NULL_TREE;
20270
20271 el_mode = TYPE_MODE (TREE_TYPE (type_out));
20272 n = TYPE_VECTOR_SUBPARTS (node: type_out);
20273 in_mode = TYPE_MODE (TREE_TYPE (type_in));
20274 in_n = TYPE_VECTOR_SUBPARTS (node: type_in);
20275 if (el_mode != in_mode
20276 || n != in_n)
20277 return NULL_TREE;
20278
20279 switch (fn)
20280 {
20281 CASE_CFN_SIN:
20282 CASE_CFN_COS:
20283 CASE_CFN_EXP:
20284 CASE_CFN_LOG:
20285 CASE_CFN_LOG2:
20286 CASE_CFN_LOG10:
20287 if (el_mode == DFmode && n == 2)
20288 {
20289 name[4] = 'd';
20290 name[5] = '2';
20291 }
20292 else if (el_mode == SFmode && n == 4)
20293 {
20294 name[4] = 's';
20295 name[5] = '4';
20296 }
20297 else
20298 return NULL_TREE;
20299 break;
20300
20301 default:
20302 return NULL_TREE;
20303 }
20304
20305 tree fndecl = mathfn_built_in (el_mode == DFmode
20306 ? double_type_node : float_type_node, fn);
20307 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20308 sprintf (s: name + 7, format: "%s", bname+10);
20309
20310 arity = 0;
20311 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20312 arity++;
20313
20314 if (arity == 1)
20315 fntype = build_function_type_list (type_out, type_in, NULL);
20316 else
20317 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20318
20319 /* Build a function declaration for the vectorized function. */
20320 new_fndecl = build_decl (BUILTINS_LOCATION,
20321 FUNCTION_DECL, get_identifier (name), fntype);
20322 TREE_PUBLIC (new_fndecl) = 1;
20323 DECL_EXTERNAL (new_fndecl) = 1;
20324 DECL_IS_NOVOPS (new_fndecl) = 1;
20325 TREE_READONLY (new_fndecl) = 1;
20326
20327 return new_fndecl;
20328}
20329
20330/* Handler for an AOCL-LibM-style interface to
20331 a library with vectorized intrinsics. */
20332
20333tree
20334ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in)
20335{
20336 char name[20] = "amd_vr";
20337 int name_len = 6;
20338 tree fntype, new_fndecl, args;
20339 unsigned arity;
20340 const char *bname;
20341 machine_mode el_mode, in_mode;
20342 int n, in_n;
20343
20344 /* AOCL-LibM is 64bits only. It is also only suitable for unsafe math only
20345 as it trades off some accuracy for increased performance. */
20346 if (!TARGET_64BIT
20347 || !flag_unsafe_math_optimizations)
20348 return NULL_TREE;
20349
20350 el_mode = TYPE_MODE (TREE_TYPE (type_out));
20351 n = TYPE_VECTOR_SUBPARTS (node: type_out);
20352 in_mode = TYPE_MODE (TREE_TYPE (type_in));
20353 in_n = TYPE_VECTOR_SUBPARTS (node: type_in);
20354 if (el_mode != in_mode
20355 || n != in_n)
20356 return NULL_TREE;
20357
20358 gcc_checking_assert (n > 0);
20359
20360 /* Decide whether there exists a function for the combination of FN, the mode
20361 and the vector width. Return early if it doesn't. */
20362
20363 if (el_mode != DFmode && el_mode != SFmode)
20364 return NULL_TREE;
20365
20366 /* Supported vector widths for given FN and single/double precision. Zeros
20367 are used to fill out unused positions in the arrays. */
20368 static const int supported_n[][2][3] = {
20369 /* Single prec. , Double prec. */
20370 { { 16, 0, 0 }, { 2, 4, 8 } }, /* TAN. */
20371 { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP. */
20372 { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP2. */
20373 { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG. */
20374 { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG2. */
20375 { { 4, 8, 16 }, { 2, 4, 8 } }, /* COS. */
20376 { { 4, 8, 16 }, { 2, 4, 8 } }, /* SIN. */
20377 { { 4, 8, 16 }, { 2, 4, 8 } }, /* POW. */
20378 { { 4, 8, 16 }, { 2, 4, 8 } }, /* ERF. */
20379 { { 4, 8, 16 }, { 2, 8, 0 } }, /* ATAN. */
20380 { { 4, 8, 16 }, { 2, 0, 0 } }, /* LOG10. */
20381 { { 4, 0, 0 }, { 2, 0, 0 } }, /* EXP10. */
20382 { { 4, 0, 0 }, { 2, 0, 0 } }, /* LOG1P. */
20383 { { 4, 8, 16 }, { 8, 0, 0 } }, /* ASIN. */
20384 { { 4, 16, 0 }, { 0, 0, 0 } }, /* ACOS. */
20385 { { 4, 8, 16 }, { 0, 0, 0 } }, /* TANH. */
20386 { { 4, 0, 0 }, { 0, 0, 0 } }, /* EXPM1. */
20387 { { 4, 8, 0 }, { 0, 0, 0 } }, /* COSH. */
20388 };
20389
20390 /* We cannot simply index the supported_n array with FN since multiple FNs
20391 may correspond to a single operation (see the definitions of these
20392 CASE_CFN_* macros). */
20393 int i;
20394 switch (fn)
20395 {
20396 CASE_CFN_TAN : i = 0; break;
20397 CASE_CFN_EXP : i = 1; break;
20398 CASE_CFN_EXP2 : i = 2; break;
20399 CASE_CFN_LOG : i = 3; break;
20400 CASE_CFN_LOG2 : i = 4; break;
20401 CASE_CFN_COS : i = 5; break;
20402 CASE_CFN_SIN : i = 6; break;
20403 CASE_CFN_POW : i = 7; break;
20404 CASE_CFN_ERF : i = 8; break;
20405 CASE_CFN_ATAN : i = 9; break;
20406 CASE_CFN_LOG10 : i = 10; break;
20407 CASE_CFN_EXP10 : i = 11; break;
20408 CASE_CFN_LOG1P : i = 12; break;
20409 CASE_CFN_ASIN : i = 13; break;
20410 CASE_CFN_ACOS : i = 14; break;
20411 CASE_CFN_TANH : i = 15; break;
20412 CASE_CFN_EXPM1 : i = 16; break;
20413 CASE_CFN_COSH : i = 17; break;
20414 default: return NULL_TREE;
20415 }
20416
20417 int j = el_mode == DFmode;
20418 bool n_is_supported = false;
20419 for (unsigned k = 0; k < 3; k++)
20420 if (supported_n[i][j][k] == n)
20421 {
20422 n_is_supported = true;
20423 break;
20424 }
20425 if (!n_is_supported)
20426 return NULL_TREE;
20427
20428 /* Append the precision and the vector width to the function name we are
20429 constructing. */
20430 name[name_len++] = el_mode == DFmode ? 'd' : 's';
20431 switch (n)
20432 {
20433 case 2:
20434 case 4:
20435 case 8:
20436 name[name_len++] = '0' + n;
20437 break;
20438 case 16:
20439 name[name_len++] = '1';
20440 name[name_len++] = '6';
20441 break;
20442 default:
20443 gcc_unreachable ();
20444 }
20445 name[name_len++] = '_';
20446
20447 /* Append the operation name (steal it from the name of a builtin). */
20448 tree fndecl = mathfn_built_in (el_mode == DFmode
20449 ? double_type_node : float_type_node, fn);
20450 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20451 sprintf (s: name + name_len, format: "%s", bname + 10);
20452
20453 arity = 0;
20454 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20455 arity++;
20456
20457 if (arity == 1)
20458 fntype = build_function_type_list (type_out, type_in, NULL);
20459 else
20460 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20461
20462 /* Build a function declaration for the vectorized function. */
20463 new_fndecl = build_decl (BUILTINS_LOCATION,
20464 FUNCTION_DECL, get_identifier (name), fntype);
20465 TREE_PUBLIC (new_fndecl) = 1;
20466 DECL_EXTERNAL (new_fndecl) = 1;
20467 TREE_READONLY (new_fndecl) = 1;
20468
20469 return new_fndecl;
20470}
20471
20472/* Returns a decl of a function that implements scatter store with
20473 register type VECTYPE and index type INDEX_TYPE and SCALE.
20474 Return NULL_TREE if it is not available. */
20475
20476static tree
20477ix86_vectorize_builtin_scatter (const_tree vectype,
20478 const_tree index_type, int scale)
20479{
20480 bool si;
20481 enum ix86_builtins code;
20482
20483 if (!TARGET_AVX512F)
20484 return NULL_TREE;
20485
20486 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
20487 ? !TARGET_USE_SCATTER_2PARTS
20488 : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
20489 ? !TARGET_USE_SCATTER_4PARTS
20490 : !TARGET_USE_SCATTER_8PARTS))
20491 return NULL_TREE;
20492
20493 if ((TREE_CODE (index_type) != INTEGER_TYPE
20494 && !POINTER_TYPE_P (index_type))
20495 || (TYPE_MODE (index_type) != SImode
20496 && TYPE_MODE (index_type) != DImode))
20497 return NULL_TREE;
20498
20499 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
20500 return NULL_TREE;
20501
20502 /* v*scatter* insn sign extends index to pointer mode. */
20503 if (TYPE_PRECISION (index_type) < POINTER_SIZE
20504 && TYPE_UNSIGNED (index_type))
20505 return NULL_TREE;
20506
20507 /* Scale can be 1, 2, 4 or 8. */
20508 if (scale <= 0
20509 || scale > 8
20510 || (scale & (scale - 1)) != 0)
20511 return NULL_TREE;
20512
20513 si = TYPE_MODE (index_type) == SImode;
20514 switch (TYPE_MODE (vectype))
20515 {
20516 case E_V8DFmode:
20517 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
20518 break;
20519 case E_V8DImode:
20520 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
20521 break;
20522 case E_V16SFmode:
20523 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
20524 break;
20525 case E_V16SImode:
20526 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
20527 break;
20528 case E_V4DFmode:
20529 if (TARGET_AVX512VL)
20530 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
20531 else
20532 return NULL_TREE;
20533 break;
20534 case E_V4DImode:
20535 if (TARGET_AVX512VL)
20536 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
20537 else
20538 return NULL_TREE;
20539 break;
20540 case E_V8SFmode:
20541 if (TARGET_AVX512VL)
20542 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
20543 else
20544 return NULL_TREE;
20545 break;
20546 case E_V8SImode:
20547 if (TARGET_AVX512VL)
20548 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
20549 else
20550 return NULL_TREE;
20551 break;
20552 case E_V2DFmode:
20553 if (TARGET_AVX512VL)
20554 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
20555 else
20556 return NULL_TREE;
20557 break;
20558 case E_V2DImode:
20559 if (TARGET_AVX512VL)
20560 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
20561 else
20562 return NULL_TREE;
20563 break;
20564 case E_V4SFmode:
20565 if (TARGET_AVX512VL)
20566 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
20567 else
20568 return NULL_TREE;
20569 break;
20570 case E_V4SImode:
20571 if (TARGET_AVX512VL)
20572 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
20573 else
20574 return NULL_TREE;
20575 break;
20576 default:
20577 return NULL_TREE;
20578 }
20579
20580 return get_ix86_builtin (c: code);
20581}
20582
20583/* Return true if it is safe to use the rsqrt optabs to optimize
20584 1.0/sqrt. */
20585
20586static bool
20587use_rsqrt_p (machine_mode mode)
20588{
20589 return ((mode == HFmode
20590 || (TARGET_SSE && TARGET_SSE_MATH))
20591 && flag_finite_math_only
20592 && !flag_trapping_math
20593 && flag_unsafe_math_optimizations);
20594}
20595
20596/* Helper for avx_vpermilps256_operand et al. This is also used by
20597 the expansion functions to turn the parallel back into a mask.
20598 The return value is 0 for no match and the imm8+1 for a match. */
20599
20600int
20601avx_vpermilp_parallel (rtx par, machine_mode mode)
20602{
20603 unsigned i, nelt = GET_MODE_NUNITS (mode);
20604 unsigned mask = 0;
20605 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
20606
20607 if (XVECLEN (par, 0) != (int) nelt)
20608 return 0;
20609
20610 /* Validate that all of the elements are constants, and not totally
20611 out of range. Copy the data into an integral array to make the
20612 subsequent checks easier. */
20613 for (i = 0; i < nelt; ++i)
20614 {
20615 rtx er = XVECEXP (par, 0, i);
20616 unsigned HOST_WIDE_INT ei;
20617
20618 if (!CONST_INT_P (er))
20619 return 0;
20620 ei = INTVAL (er);
20621 if (ei >= nelt)
20622 return 0;
20623 ipar[i] = ei;
20624 }
20625
20626 switch (mode)
20627 {
20628 case E_V8DFmode:
20629 case E_V8DImode:
20630 /* In the 512-bit DFmode case, we can only move elements within
20631 a 128-bit lane. First fill the second part of the mask,
20632 then fallthru. */
20633 for (i = 4; i < 6; ++i)
20634 {
20635 if (ipar[i] < 4 || ipar[i] >= 6)
20636 return 0;
20637 mask |= (ipar[i] - 4) << i;
20638 }
20639 for (i = 6; i < 8; ++i)
20640 {
20641 if (ipar[i] < 6)
20642 return 0;
20643 mask |= (ipar[i] - 6) << i;
20644 }
20645 /* FALLTHRU */
20646
20647 case E_V4DFmode:
20648 case E_V4DImode:
20649 /* In the 256-bit DFmode case, we can only move elements within
20650 a 128-bit lane. */
20651 for (i = 0; i < 2; ++i)
20652 {
20653 if (ipar[i] >= 2)
20654 return 0;
20655 mask |= ipar[i] << i;
20656 }
20657 for (i = 2; i < 4; ++i)
20658 {
20659 if (ipar[i] < 2)
20660 return 0;
20661 mask |= (ipar[i] - 2) << i;
20662 }
20663 break;
20664
20665 case E_V16SFmode:
20666 case E_V16SImode:
20667 /* In 512 bit SFmode case, permutation in the upper 256 bits
20668 must mirror the permutation in the lower 256-bits. */
20669 for (i = 0; i < 8; ++i)
20670 if (ipar[i] + 8 != ipar[i + 8])
20671 return 0;
20672 /* FALLTHRU */
20673
20674 case E_V8SFmode:
20675 case E_V8SImode:
20676 /* In 256 bit SFmode case, we have full freedom of
20677 movement within the low 128-bit lane, but the high 128-bit
20678 lane must mirror the exact same pattern. */
20679 for (i = 0; i < 4; ++i)
20680 if (ipar[i] + 4 != ipar[i + 4])
20681 return 0;
20682 nelt = 4;
20683 /* FALLTHRU */
20684
20685 case E_V2DFmode:
20686 case E_V2DImode:
20687 case E_V4SFmode:
20688 case E_V4SImode:
20689 /* In the 128-bit case, we've full freedom in the placement of
20690 the elements from the source operand. */
20691 for (i = 0; i < nelt; ++i)
20692 mask |= ipar[i] << (i * (nelt / 2));
20693 break;
20694
20695 default:
20696 gcc_unreachable ();
20697 }
20698
20699 /* Make sure success has a non-zero value by adding one. */
20700 return mask + 1;
20701}
20702
20703/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
20704 the expansion functions to turn the parallel back into a mask.
20705 The return value is 0 for no match and the imm8+1 for a match. */
20706
20707int
20708avx_vperm2f128_parallel (rtx par, machine_mode mode)
20709{
20710 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
20711 unsigned mask = 0;
20712 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
20713
20714 if (XVECLEN (par, 0) != (int) nelt)
20715 return 0;
20716
20717 /* Validate that all of the elements are constants, and not totally
20718 out of range. Copy the data into an integral array to make the
20719 subsequent checks easier. */
20720 for (i = 0; i < nelt; ++i)
20721 {
20722 rtx er = XVECEXP (par, 0, i);
20723 unsigned HOST_WIDE_INT ei;
20724
20725 if (!CONST_INT_P (er))
20726 return 0;
20727 ei = INTVAL (er);
20728 if (ei >= 2 * nelt)
20729 return 0;
20730 ipar[i] = ei;
20731 }
20732
20733 /* Validate that the halves of the permute are halves. */
20734 for (i = 0; i < nelt2 - 1; ++i)
20735 if (ipar[i] + 1 != ipar[i + 1])
20736 return 0;
20737 for (i = nelt2; i < nelt - 1; ++i)
20738 if (ipar[i] + 1 != ipar[i + 1])
20739 return 0;
20740
20741 /* Reconstruct the mask. */
20742 for (i = 0; i < 2; ++i)
20743 {
20744 unsigned e = ipar[i * nelt2];
20745 if (e % nelt2)
20746 return 0;
20747 e /= nelt2;
20748 mask |= e << (i * 4);
20749 }
20750
20751 /* Make sure success has a non-zero value by adding one. */
20752 return mask + 1;
20753}
20754
20755/* Return a mask of VPTERNLOG operands that do not affect output. */
20756
20757int
20758vpternlog_redundant_operand_mask (rtx pternlog_imm)
20759{
20760 int mask = 0;
20761 int imm8 = INTVAL (pternlog_imm);
20762
20763 if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
20764 mask |= 1;
20765 if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
20766 mask |= 2;
20767 if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
20768 mask |= 4;
20769
20770 return mask;
20771}
20772
20773/* Eliminate false dependencies on operands that do not affect output
20774 by substituting other operands of a VPTERNLOG. */
20775
20776void
20777substitute_vpternlog_operands (rtx *operands)
20778{
20779 int mask = vpternlog_redundant_operand_mask (pternlog_imm: operands[4]);
20780
20781 if (mask & 1) /* The first operand is redundant. */
20782 operands[1] = operands[2];
20783
20784 if (mask & 2) /* The second operand is redundant. */
20785 operands[2] = operands[1];
20786
20787 if (mask & 4) /* The third operand is redundant. */
20788 operands[3] = operands[1];
20789 else if (REG_P (operands[3]))
20790 {
20791 if (mask & 1)
20792 operands[1] = operands[3];
20793 if (mask & 2)
20794 operands[2] = operands[3];
20795 }
20796}
20797
20798/* Return a register priority for hard reg REGNO. */
20799static int
20800ix86_register_priority (int hard_regno)
20801{
20802 /* ebp and r13 as the base always wants a displacement, r12 as the
20803 base always wants an index. So discourage their usage in an
20804 address. */
20805 if (hard_regno == R12_REG || hard_regno == R13_REG)
20806 return 0;
20807 if (hard_regno == BP_REG)
20808 return 1;
20809 /* New x86-64 int registers result in bigger code size. Discourage them. */
20810 if (REX_INT_REGNO_P (hard_regno))
20811 return 2;
20812 if (REX2_INT_REGNO_P (hard_regno))
20813 return 2;
20814 /* New x86-64 SSE registers result in bigger code size. Discourage them. */
20815 if (REX_SSE_REGNO_P (hard_regno))
20816 return 2;
20817 if (EXT_REX_SSE_REGNO_P (hard_regno))
20818 return 1;
20819 /* Usage of AX register results in smaller code. Prefer it. */
20820 if (hard_regno == AX_REG)
20821 return 4;
20822 return 3;
20823}
20824
20825/* Implement TARGET_PREFERRED_RELOAD_CLASS.
20826
20827 Put float CONST_DOUBLE in the constant pool instead of fp regs.
20828 QImode must go into class Q_REGS.
20829 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20830 movdf to do mem-to-mem moves through integer regs. */
20831
20832static reg_class_t
20833ix86_preferred_reload_class (rtx x, reg_class_t regclass)
20834{
20835 machine_mode mode = GET_MODE (x);
20836
20837 /* We're only allowed to return a subclass of CLASS. Many of the
20838 following checks fail for NO_REGS, so eliminate that early. */
20839 if (regclass == NO_REGS)
20840 return NO_REGS;
20841
20842 /* All classes can load zeros. */
20843 if (x == CONST0_RTX (mode))
20844 return regclass;
20845
20846 /* Force constants into memory if we are loading a (nonzero) constant into
20847 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
20848 instructions to load from a constant. */
20849 if (CONSTANT_P (x)
20850 && (MAYBE_MMX_CLASS_P (regclass)
20851 || MAYBE_SSE_CLASS_P (regclass)
20852 || MAYBE_MASK_CLASS_P (regclass)))
20853 return NO_REGS;
20854
20855 /* Floating-point constants need more complex checks. */
20856 if (CONST_DOUBLE_P (x))
20857 {
20858 /* General regs can load everything. */
20859 if (INTEGER_CLASS_P (regclass))
20860 return regclass;
20861
20862 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20863 zero above. We only want to wind up preferring 80387 registers if
20864 we plan on doing computation with them. */
20865 if (IS_STACK_MODE (mode)
20866 && standard_80387_constant_p (x) > 0)
20867 {
20868 /* Limit class to FP regs. */
20869 if (FLOAT_CLASS_P (regclass))
20870 return FLOAT_REGS;
20871 }
20872
20873 return NO_REGS;
20874 }
20875
20876 /* Prefer SSE if we can use them for math. Also allow integer regs
20877 when moves between register units are cheap. */
20878 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20879 {
20880 if (TARGET_INTER_UNIT_MOVES_FROM_VEC
20881 && TARGET_INTER_UNIT_MOVES_TO_VEC
20882 && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
20883 return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20884 else
20885 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20886 }
20887
20888 /* Generally when we see PLUS here, it's the function invariant
20889 (plus soft-fp const_int). Which can only be computed into general
20890 regs. */
20891 if (GET_CODE (x) == PLUS)
20892 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
20893
20894 /* QImode constants are easy to load, but non-constant QImode data
20895 must go into Q_REGS or ALL_MASK_REGS. */
20896 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20897 {
20898 if (Q_CLASS_P (regclass))
20899 return regclass;
20900 else if (reg_class_subset_p (Q_REGS, regclass))
20901 return Q_REGS;
20902 else if (MASK_CLASS_P (regclass))
20903 return regclass;
20904 else
20905 return NO_REGS;
20906 }
20907
20908 return regclass;
20909}
20910
20911/* Discourage putting floating-point values in SSE registers unless
20912 SSE math is being used, and likewise for the 387 registers. */
20913static reg_class_t
20914ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
20915{
20916 /* Restrict the output reload class to the register bank that we are doing
20917 math on. If we would like not to return a subset of CLASS, reject this
20918 alternative: if reload cannot do this, it will still use its choice. */
20919 machine_mode mode = GET_MODE (x);
20920 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20921 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
20922
20923 if (IS_STACK_MODE (mode))
20924 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20925
20926 return regclass;
20927}
20928
20929static reg_class_t
20930ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
20931 machine_mode mode, secondary_reload_info *sri)
20932{
20933 /* Double-word spills from general registers to non-offsettable memory
20934 references (zero-extended addresses) require special handling. */
20935 if (TARGET_64BIT
20936 && MEM_P (x)
20937 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
20938 && INTEGER_CLASS_P (rclass)
20939 && !offsettable_memref_p (x))
20940 {
20941 sri->icode = (in_p
20942 ? CODE_FOR_reload_noff_load
20943 : CODE_FOR_reload_noff_store);
20944 /* Add the cost of moving address to a temporary. */
20945 sri->extra_cost = 1;
20946
20947 return NO_REGS;
20948 }
20949
20950 /* QImode spills from non-QI registers require
20951 intermediate register on 32bit targets. */
20952 if (mode == QImode
20953 && ((!TARGET_64BIT && !in_p
20954 && INTEGER_CLASS_P (rclass)
20955 && MAYBE_NON_Q_CLASS_P (rclass))
20956 || (!TARGET_AVX512DQ
20957 && MAYBE_MASK_CLASS_P (rclass))))
20958 {
20959 int regno = true_regnum (x);
20960
20961 /* Return Q_REGS if the operand is in memory. */
20962 if (regno == -1)
20963 return Q_REGS;
20964
20965 return NO_REGS;
20966 }
20967
20968 /* Require movement to gpr, and then store to memory. */
20969 if ((mode == HFmode || mode == HImode || mode == V2QImode
20970 || mode == BFmode)
20971 && !TARGET_SSE4_1
20972 && SSE_CLASS_P (rclass)
20973 && !in_p && MEM_P (x))
20974 {
20975 sri->extra_cost = 1;
20976 return GENERAL_REGS;
20977 }
20978
20979 /* This condition handles corner case where an expression involving
20980 pointers gets vectorized. We're trying to use the address of a
20981 stack slot as a vector initializer.
20982
20983 (set (reg:V2DI 74 [ vect_cst_.2 ])
20984 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
20985
20986 Eventually frame gets turned into sp+offset like this:
20987
20988 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20989 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20990 (const_int 392 [0x188]))))
20991
20992 That later gets turned into:
20993
20994 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20995 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20996 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
20997
20998 We'll have the following reload recorded:
20999
21000 Reload 0: reload_in (DI) =
21001 (plus:DI (reg/f:DI 7 sp)
21002 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
21003 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21004 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
21005 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
21006 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
21007 reload_reg_rtx: (reg:V2DI 22 xmm1)
21008
21009 Which isn't going to work since SSE instructions can't handle scalar
21010 additions. Returning GENERAL_REGS forces the addition into integer
21011 register and reload can handle subsequent reloads without problems. */
21012
21013 if (in_p && GET_CODE (x) == PLUS
21014 && SSE_CLASS_P (rclass)
21015 && SCALAR_INT_MODE_P (mode))
21016 return GENERAL_REGS;
21017
21018 return NO_REGS;
21019}
21020
21021/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
21022
21023static bool
21024ix86_class_likely_spilled_p (reg_class_t rclass)
21025{
21026 switch (rclass)
21027 {
21028 case AREG:
21029 case DREG:
21030 case CREG:
21031 case BREG:
21032 case AD_REGS:
21033 case SIREG:
21034 case DIREG:
21035 case SSE_FIRST_REG:
21036 case FP_TOP_REG:
21037 case FP_SECOND_REG:
21038 return true;
21039
21040 default:
21041 break;
21042 }
21043
21044 return false;
21045}
21046
21047/* Implement TARGET_CALLEE_SAVE_COST. */
21048
21049static int
21050ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
21051 unsigned int, int mem_cost, const HARD_REG_SET &, bool)
21052{
21053 /* Account for the fact that push and pop are shorter and do their
21054 own allocation and deallocation. */
21055 if (GENERAL_REGNO_P (hard_regno))
21056 {
21057 /* push is 1 byte while typical spill is 4-5 bytes.
21058 ??? We probably should adjust size costs accordingly.
21059 Costs are relative to reg-reg move that has 2 bytes for 32bit
21060 and 3 bytes otherwise. Be sure that no cost table sets cost
21061 to 2, so we end up with 0. */
21062 if (mem_cost <= 2 || optimize_function_for_size_p (cfun))
21063 return 1;
21064 return mem_cost - 2;
21065 }
21066 return mem_cost;
21067}
21068
21069/* Return true if a set of DST by the expression SRC should be allowed.
21070 This prevents complex sets of likely_spilled hard regs before split1. */
21071
21072bool
21073ix86_hardreg_mov_ok (rtx dst, rtx src)
21074{
21075 /* Avoid complex sets of likely_spilled hard registers before reload. */
21076 if (REG_P (dst) && HARD_REGISTER_P (dst)
21077 && !REG_P (src) && !MEM_P (src)
21078 && !(VECTOR_MODE_P (GET_MODE (dst))
21079 ? standard_sse_constant_p (x: src, GET_MODE (dst))
21080 : x86_64_immediate_operand (src, GET_MODE (dst)))
21081 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
21082 && ix86_pre_reload_split ())
21083 return false;
21084 return true;
21085}
21086
21087/* If we are copying between registers from different register sets
21088 (e.g. FP and integer), we may need a memory location.
21089
21090 The function can't work reliably when one of the CLASSES is a class
21091 containing registers from multiple sets. We avoid this by never combining
21092 different sets in a single alternative in the machine description.
21093 Ensure that this constraint holds to avoid unexpected surprises.
21094
21095 When STRICT is false, we are being called from REGISTER_MOVE_COST,
21096 so do not enforce these sanity checks.
21097
21098 To optimize register_move_cost performance, define inline variant. */
21099
21100static inline bool
21101inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
21102 reg_class_t class2, int strict)
21103{
21104 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
21105 return false;
21106
21107 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21108 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21109 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21110 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21111 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21112 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
21113 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
21114 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
21115 {
21116 gcc_assert (!strict || lra_in_progress);
21117 return true;
21118 }
21119
21120 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21121 return true;
21122
21123 /* ??? This is a lie. We do have moves between mmx/general, and for
21124 mmx/sse2. But by saying we need secondary memory we discourage the
21125 register allocator from using the mmx registers unless needed. */
21126 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21127 return true;
21128
21129 /* Between mask and general, we have moves no larger than word size. */
21130 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
21131 {
21132 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
21133 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21134 return true;
21135 }
21136
21137 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21138 {
21139 /* SSE1 doesn't have any direct moves from other classes. */
21140 if (!TARGET_SSE2)
21141 return true;
21142
21143 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
21144 return true;
21145
21146 /* If the target says that inter-unit moves are more expensive
21147 than moving through memory, then don't generate them. */
21148 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
21149 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
21150 return true;
21151
21152 /* With SSE4.1, *mov{ti,di}_internal supports moves between
21153 SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}. */
21154 if (TARGET_SSE4_1
21155 && (TARGET_64BIT ? mode == TImode : mode == DImode))
21156 return false;
21157
21158 int msize = GET_MODE_SIZE (mode);
21159
21160 /* Between SSE and general, we have moves no larger than word size. */
21161 if (msize > UNITS_PER_WORD)
21162 return true;
21163
21164 /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
21165 Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
21166 int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
21167
21168 if (msize < minsize)
21169 return true;
21170 }
21171
21172 return false;
21173}
21174
21175/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
21176
21177static bool
21178ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
21179 reg_class_t class2)
21180{
21181 return inline_secondary_memory_needed (mode, class1, class2, strict: true);
21182}
21183
21184/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
21185
21186 get_secondary_mem widens integral modes to BITS_PER_WORD.
21187 There is no need to emit full 64 bit move on 64 bit targets
21188 for integral modes that can be moved using 32 bit move. */
21189
21190static machine_mode
21191ix86_secondary_memory_needed_mode (machine_mode mode)
21192{
21193 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
21194 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
21195 return mode;
21196}
21197
21198/* Implement the TARGET_CLASS_MAX_NREGS hook.
21199
21200 On the 80386, this is the size of MODE in words,
21201 except in the FP regs, where a single reg is always enough. */
21202
21203static unsigned char
21204ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
21205{
21206 if (MAYBE_INTEGER_CLASS_P (rclass))
21207 {
21208 if (mode == XFmode)
21209 return (TARGET_64BIT ? 2 : 3);
21210 else if (mode == XCmode)
21211 return (TARGET_64BIT ? 4 : 6);
21212 else
21213 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
21214 }
21215 else
21216 {
21217 if (COMPLEX_MODE_P (mode))
21218 return 2;
21219 else
21220 return 1;
21221 }
21222}
21223
21224/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
21225
21226static bool
21227ix86_can_change_mode_class (machine_mode from, machine_mode to,
21228 reg_class_t regclass)
21229{
21230 if (from == to)
21231 return true;
21232
21233 /* x87 registers can't do subreg at all, as all values are reformatted
21234 to extended precision.
21235
21236 ??? middle-end queries mode changes for ALL_REGS and this makes
21237 vec_series_lowpart_p to always return false. We probably should
21238 restrict this to modes supported by i387 and check if it is enabled. */
21239 if (MAYBE_FLOAT_CLASS_P (regclass))
21240 return false;
21241
21242 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21243 {
21244 /* Vector registers do not support QI or HImode loads. If we don't
21245 disallow a change to these modes, reload will assume it's ok to
21246 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21247 the vec_dupv4hi pattern.
21248 NB: SSE2 can load 16bit data to sse register via pinsrw. */
21249 int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
21250 if (GET_MODE_SIZE (from) < mov_size
21251 || GET_MODE_SIZE (to) < mov_size)
21252 return false;
21253 }
21254
21255 return true;
21256}
21257
21258/* Return index of MODE in the sse load/store tables. */
21259
21260static inline int
21261sse_store_index (machine_mode mode)
21262{
21263 /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
21264 costs to processor_costs, which requires changes to all entries in
21265 processor cost table. */
21266 if (mode == E_HFmode)
21267 mode = E_SFmode;
21268
21269 switch (GET_MODE_SIZE (mode))
21270 {
21271 case 4:
21272 return 0;
21273 case 8:
21274 return 1;
21275 case 16:
21276 return 2;
21277 case 32:
21278 return 3;
21279 case 64:
21280 return 4;
21281 default:
21282 return -1;
21283 }
21284}
21285
21286/* Return the cost of moving data of mode M between a
21287 register and memory. A value of 2 is the default; this cost is
21288 relative to those in `REGISTER_MOVE_COST'.
21289
21290 This function is used extensively by register_move_cost that is used to
21291 build tables at startup. Make it inline in this case.
21292 When IN is 2, return maximum of in and out move cost.
21293
21294 If moving between registers and memory is more expensive than
21295 between two registers, you should define this macro to express the
21296 relative cost.
21297
21298 Model also increased moving costs of QImode registers in non
21299 Q_REGS classes.
21300 */
21301static inline int
21302inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
21303{
21304 int cost;
21305
21306 if (FLOAT_CLASS_P (regclass))
21307 {
21308 int index;
21309 switch (mode)
21310 {
21311 case E_SFmode:
21312 index = 0;
21313 break;
21314 case E_DFmode:
21315 index = 1;
21316 break;
21317 case E_XFmode:
21318 index = 2;
21319 break;
21320 default:
21321 return 100;
21322 }
21323 if (in == 2)
21324 return MAX (ix86_cost->hard_register.fp_load [index],
21325 ix86_cost->hard_register.fp_store [index]);
21326 return in ? ix86_cost->hard_register.fp_load [index]
21327 : ix86_cost->hard_register.fp_store [index];
21328 }
21329 if (SSE_CLASS_P (regclass))
21330 {
21331 int index = sse_store_index (mode);
21332 if (index == -1)
21333 return 100;
21334 if (in == 2)
21335 return MAX (ix86_cost->hard_register.sse_load [index],
21336 ix86_cost->hard_register.sse_store [index]);
21337 return in ? ix86_cost->hard_register.sse_load [index]
21338 : ix86_cost->hard_register.sse_store [index];
21339 }
21340 if (MASK_CLASS_P (regclass))
21341 {
21342 int index;
21343 switch (GET_MODE_SIZE (mode))
21344 {
21345 case 1:
21346 index = 0;
21347 break;
21348 case 2:
21349 index = 1;
21350 break;
21351 /* DImode loads and stores assumed to cost the same as SImode. */
21352 case 4:
21353 case 8:
21354 index = 2;
21355 break;
21356 default:
21357 return 100;
21358 }
21359
21360 if (in == 2)
21361 return MAX (ix86_cost->hard_register.mask_load[index],
21362 ix86_cost->hard_register.mask_store[index]);
21363 return in ? ix86_cost->hard_register.mask_load[2]
21364 : ix86_cost->hard_register.mask_store[2];
21365 }
21366 if (MMX_CLASS_P (regclass))
21367 {
21368 int index;
21369 switch (GET_MODE_SIZE (mode))
21370 {
21371 case 4:
21372 index = 0;
21373 break;
21374 case 8:
21375 index = 1;
21376 break;
21377 default:
21378 return 100;
21379 }
21380 if (in == 2)
21381 return MAX (ix86_cost->hard_register.mmx_load [index],
21382 ix86_cost->hard_register.mmx_store [index]);
21383 return in ? ix86_cost->hard_register.mmx_load [index]
21384 : ix86_cost->hard_register.mmx_store [index];
21385 }
21386 switch (GET_MODE_SIZE (mode))
21387 {
21388 case 1:
21389 if (Q_CLASS_P (regclass) || TARGET_64BIT)
21390 {
21391 if (!in)
21392 return ix86_cost->hard_register.int_store[0];
21393 if (TARGET_PARTIAL_REG_DEPENDENCY
21394 && optimize_function_for_speed_p (cfun))
21395 cost = ix86_cost->hard_register.movzbl_load;
21396 else
21397 cost = ix86_cost->hard_register.int_load[0];
21398 if (in == 2)
21399 return MAX (cost, ix86_cost->hard_register.int_store[0]);
21400 return cost;
21401 }
21402 else
21403 {
21404 if (in == 2)
21405 return MAX (ix86_cost->hard_register.movzbl_load,
21406 ix86_cost->hard_register.int_store[0] + 4);
21407 if (in)
21408 return ix86_cost->hard_register.movzbl_load;
21409 else
21410 return ix86_cost->hard_register.int_store[0] + 4;
21411 }
21412 break;
21413 case 2:
21414 {
21415 int cost;
21416 if (in == 2)
21417 cost = MAX (ix86_cost->hard_register.int_load[1],
21418 ix86_cost->hard_register.int_store[1]);
21419 else
21420 cost = in ? ix86_cost->hard_register.int_load[1]
21421 : ix86_cost->hard_register.int_store[1];
21422
21423 if (mode == E_HFmode)
21424 {
21425 /* Prefer SSE over GPR for HFmode. */
21426 int sse_cost;
21427 int index = sse_store_index (mode);
21428 if (in == 2)
21429 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
21430 ix86_cost->hard_register.sse_store[index]);
21431 else
21432 sse_cost = (in
21433 ? ix86_cost->hard_register.sse_load [index]
21434 : ix86_cost->hard_register.sse_store [index]);
21435 if (sse_cost >= cost)
21436 cost = sse_cost + 1;
21437 }
21438 return cost;
21439 }
21440 default:
21441 if (in == 2)
21442 cost = MAX (ix86_cost->hard_register.int_load[2],
21443 ix86_cost->hard_register.int_store[2]);
21444 else if (in)
21445 cost = ix86_cost->hard_register.int_load[2];
21446 else
21447 cost = ix86_cost->hard_register.int_store[2];
21448 /* Multiply with the number of GPR moves needed. */
21449 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
21450 }
21451}
21452
21453static int
21454ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
21455{
21456 return inline_memory_move_cost (mode, regclass: (enum reg_class) regclass, in: in ? 1 : 0);
21457}
21458
21459
21460/* Return the cost of moving data from a register in class CLASS1 to
21461 one in class CLASS2.
21462
21463 It is not required that the cost always equal 2 when FROM is the same as TO;
21464 on some machines it is expensive to move between registers if they are not
21465 general registers. */
21466
21467static int
21468ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
21469 reg_class_t class2_i)
21470{
21471 enum reg_class class1 = (enum reg_class) class1_i;
21472 enum reg_class class2 = (enum reg_class) class2_i;
21473
21474 /* In case we require secondary memory, compute cost of the store followed
21475 by load. In order to avoid bad register allocation choices, we need
21476 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21477
21478 if (inline_secondary_memory_needed (mode, class1, class2, strict: false))
21479 {
21480 int cost = 1;
21481
21482 cost += inline_memory_move_cost (mode, regclass: class1, in: 2);
21483 cost += inline_memory_move_cost (mode, regclass: class2, in: 2);
21484
21485 /* In case of copying from general_purpose_register we may emit multiple
21486 stores followed by single load causing memory size mismatch stall.
21487 Count this as arbitrarily high cost of 20. */
21488 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
21489 && TARGET_MEMORY_MISMATCH_STALL
21490 && targetm.class_max_nregs (class1, mode)
21491 > targetm.class_max_nregs (class2, mode))
21492 cost += 20;
21493
21494 /* In the case of FP/MMX moves, the registers actually overlap, and we
21495 have to switch modes in order to treat them differently. */
21496 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21497 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21498 cost += 20;
21499
21500 return cost;
21501 }
21502
21503 /* Moves between MMX and non-MMX units require secondary memory. */
21504 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21505 gcc_unreachable ();
21506
21507 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21508 return (SSE_CLASS_P (class1)
21509 ? ix86_cost->hard_register.sse_to_integer
21510 : ix86_cost->hard_register.integer_to_sse);
21511
21512 /* Moves between mask register and GPR. */
21513 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
21514 {
21515 return (MASK_CLASS_P (class1)
21516 ? ix86_cost->hard_register.mask_to_integer
21517 : ix86_cost->hard_register.integer_to_mask);
21518 }
21519 /* Moving between mask registers. */
21520 if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
21521 return ix86_cost->hard_register.mask_move;
21522
21523 if (MAYBE_FLOAT_CLASS_P (class1))
21524 return ix86_cost->hard_register.fp_move;
21525 if (MAYBE_SSE_CLASS_P (class1))
21526 {
21527 if (GET_MODE_BITSIZE (mode) <= 128)
21528 return ix86_cost->hard_register.xmm_move;
21529 if (GET_MODE_BITSIZE (mode) <= 256)
21530 return ix86_cost->hard_register.ymm_move;
21531 return ix86_cost->hard_register.zmm_move;
21532 }
21533 if (MAYBE_MMX_CLASS_P (class1))
21534 return ix86_cost->hard_register.mmx_move;
21535 return 2;
21536}
21537
21538/* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
21539 words of a value of mode MODE but can be less for certain modes in
21540 special long registers.
21541
21542 Actually there are no two word move instructions for consecutive
21543 registers. And only registers 0-3 may have mov byte instructions
21544 applied to them. */
21545
21546static unsigned int
21547ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
21548{
21549 if (GENERAL_REGNO_P (regno))
21550 {
21551 if (mode == XFmode)
21552 return TARGET_64BIT ? 2 : 3;
21553 if (mode == XCmode)
21554 return TARGET_64BIT ? 4 : 6;
21555 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
21556 }
21557 if (COMPLEX_MODE_P (mode))
21558 return 2;
21559 /* Register pair for mask registers. */
21560 if (mode == P2QImode || mode == P2HImode)
21561 return 2;
21562
21563 return 1;
21564}
21565
21566/* Implement REGMODE_NATURAL_SIZE(MODE). */
21567unsigned int
21568ix86_regmode_natural_size (machine_mode mode)
21569{
21570 if (mode == P2HImode || mode == P2QImode)
21571 return GET_MODE_SIZE (mode) / 2;
21572 return UNITS_PER_WORD;
21573}
21574
21575/* Implement TARGET_HARD_REGNO_MODE_OK. */
21576
21577static bool
21578ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
21579{
21580 /* Flags and only flags can only hold CCmode values. */
21581 if (CC_REGNO_P (regno))
21582 return GET_MODE_CLASS (mode) == MODE_CC;
21583 if (GET_MODE_CLASS (mode) == MODE_CC
21584 || GET_MODE_CLASS (mode) == MODE_RANDOM)
21585 return false;
21586 if (STACK_REGNO_P (regno))
21587 return VALID_FP_MODE_P (mode);
21588 if (MASK_REGNO_P (regno))
21589 {
21590 /* Register pair only starts at even register number. */
21591 if ((mode == P2QImode || mode == P2HImode))
21592 return MASK_PAIR_REGNO_P(regno);
21593
21594 return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
21595 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
21596 }
21597
21598 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21599 return false;
21600
21601 if (SSE_REGNO_P (regno))
21602 {
21603 /* We implement the move patterns for all vector modes into and
21604 out of SSE registers, even when no operation instructions
21605 are available. */
21606
21607 /* For AVX-512 we allow, regardless of regno:
21608 - XI mode
21609 - any of 512-bit wide vector mode
21610 - any scalar mode. */
21611 if (TARGET_AVX512F
21612 && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
21613 || VALID_AVX512F_SCALAR_MODE (mode)))
21614 return true;
21615
21616 /* TODO check for QI/HI scalars. */
21617 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
21618 if (TARGET_AVX512VL
21619 && (VALID_AVX256_REG_OR_OI_MODE (mode)
21620 || VALID_AVX512VL_128_REG_MODE (mode)))
21621 return true;
21622
21623 /* xmm16-xmm31 are only available for AVX-512. */
21624 if (EXT_REX_SSE_REGNO_P (regno))
21625 return false;
21626
21627 /* OImode and AVX modes are available only when AVX is enabled. */
21628 return ((TARGET_AVX
21629 && VALID_AVX256_REG_OR_OI_MODE (mode))
21630 || VALID_SSE_REG_MODE (mode)
21631 || VALID_SSE2_REG_MODE (mode)
21632 || VALID_MMX_REG_MODE (mode)
21633 || VALID_MMX_REG_MODE_3DNOW (mode));
21634 }
21635 if (MMX_REGNO_P (regno))
21636 {
21637 /* We implement the move patterns for 3DNOW modes even in MMX mode,
21638 so if the register is available at all, then we can move data of
21639 the given mode into or out of it. */
21640 return (VALID_MMX_REG_MODE (mode)
21641 || VALID_MMX_REG_MODE_3DNOW (mode));
21642 }
21643
21644 if (mode == QImode)
21645 {
21646 /* Take care for QImode values - they can be in non-QI regs,
21647 but then they do cause partial register stalls. */
21648 if (ANY_QI_REGNO_P (regno))
21649 return true;
21650 if (!TARGET_PARTIAL_REG_STALL)
21651 return true;
21652 /* LRA checks if the hard register is OK for the given mode.
21653 QImode values can live in non-QI regs, so we allow all
21654 registers here. */
21655 if (lra_in_progress)
21656 return true;
21657 return !can_create_pseudo_p ();
21658 }
21659 /* We handle both integer and floats in the general purpose registers. */
21660 else if (VALID_INT_MODE_P (mode)
21661 || VALID_FP_MODE_P (mode))
21662 return true;
21663 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
21664 on to use that value in smaller contexts, this can easily force a
21665 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
21666 supporting DImode, allow it. */
21667 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
21668 return true;
21669
21670 return false;
21671}
21672
21673/* Implement TARGET_INSN_CALLEE_ABI. */
21674
21675const predefined_function_abi &
21676ix86_insn_callee_abi (const rtx_insn *insn)
21677{
21678 unsigned int abi_id = 0;
21679 rtx pat = PATTERN (insn);
21680 if (vzeroupper_pattern (pat, VOIDmode))
21681 abi_id = ABI_VZEROUPPER;
21682
21683 return function_abis[abi_id];
21684}
21685
21686/* Initialize function_abis with corresponding abi_id,
21687 currently only handle vzeroupper. */
21688void
21689ix86_initialize_callee_abi (unsigned int abi_id)
21690{
21691 gcc_assert (abi_id == ABI_VZEROUPPER);
21692 predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
21693 if (!vzeroupper_abi.initialized_p ())
21694 {
21695 HARD_REG_SET full_reg_clobbers;
21696 CLEAR_HARD_REG_SET (set&: full_reg_clobbers);
21697 vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
21698 }
21699}
21700
21701void
21702ix86_expand_avx_vzeroupper (void)
21703{
21704 /* Initialize vzeroupper_abi here. */
21705 ix86_initialize_callee_abi (ABI_VZEROUPPER);
21706 rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
21707 /* Return false for non-local goto in can_nonlocal_goto. */
21708 make_reg_eh_region_note (insn, ecf_flags: 0, INT_MIN);
21709 /* Flag used for call_insn indicates it's a fake call. */
21710 RTX_FLAG (insn, used) = 1;
21711}
21712
21713
21714/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
21715 saves SSE registers across calls is Win64 (thus no need to check the
21716 current ABI here), and with AVX enabled Win64 only guarantees that
21717 the low 16 bytes are saved. */
21718
21719static bool
21720ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
21721 machine_mode mode)
21722{
21723 /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
21724 if (abi_id == ABI_VZEROUPPER)
21725 return (GET_MODE_SIZE (mode) > 16
21726 && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
21727 || LEGACY_SSE_REGNO_P (regno)));
21728
21729 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
21730}
21731
21732/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21733 tieable integer mode. */
21734
21735static bool
21736ix86_tieable_integer_mode_p (machine_mode mode)
21737{
21738 switch (mode)
21739 {
21740 case E_HImode:
21741 case E_SImode:
21742 return true;
21743
21744 case E_QImode:
21745 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
21746
21747 case E_DImode:
21748 return TARGET_64BIT;
21749
21750 default:
21751 return false;
21752 }
21753}
21754
21755/* Implement TARGET_MODES_TIEABLE_P.
21756
21757 Return true if MODE1 is accessible in a register that can hold MODE2
21758 without copying. That is, all register classes that can hold MODE2
21759 can also hold MODE1. */
21760
21761static bool
21762ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
21763{
21764 if (mode1 == mode2)
21765 return true;
21766
21767 if (ix86_tieable_integer_mode_p (mode: mode1)
21768 && ix86_tieable_integer_mode_p (mode: mode2))
21769 return true;
21770
21771 /* MODE2 being XFmode implies fp stack or general regs, which means we
21772 can tie any smaller floating point modes to it. Note that we do not
21773 tie this with TFmode. */
21774 if (mode2 == XFmode)
21775 return mode1 == SFmode || mode1 == DFmode;
21776
21777 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21778 that we can tie it with SFmode. */
21779 if (mode2 == DFmode)
21780 return mode1 == SFmode;
21781
21782 /* If MODE2 is only appropriate for an SSE register, then tie with
21783 any vector modes or scalar floating point modes acceptable to SSE
21784 registers, excluding scalar integer modes with SUBREG:
21785 (subreg:QI (reg:TI 99) 0))
21786 (subreg:HI (reg:TI 99) 0))
21787 (subreg:SI (reg:TI 99) 0))
21788 (subreg:DI (reg:TI 99) 0))
21789 to avoid unnecessary move from SSE register to integer register.
21790 */
21791 if (GET_MODE_SIZE (mode2) >= 16
21792 && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
21793 || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
21794 && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
21795 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2))
21796 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1);
21797
21798 /* If MODE2 is appropriate for an MMX register, then tie
21799 with any other mode acceptable to MMX registers. */
21800 if (GET_MODE_SIZE (mode2) == 8
21801 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode2))
21802 return (GET_MODE_SIZE (mode1) == 8
21803 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode1));
21804
21805 /* SCmode and DImode can be tied. */
21806 if ((mode1 == E_SCmode && mode2 == E_DImode)
21807 || (mode1 == E_DImode && mode2 == E_SCmode))
21808 return TARGET_64BIT;
21809
21810 /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
21811 if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
21812 || (mode1 == E_V2SFmode && mode2 == E_SCmode)
21813 || (mode1 == E_DCmode && mode2 == E_V2DFmode)
21814 || (mode1 == E_V2DFmode && mode2 == E_DCmode))
21815 return true;
21816
21817 return false;
21818}
21819
21820/* Return the cost of moving between two registers of mode MODE. */
21821
21822static int
21823ix86_set_reg_reg_cost (machine_mode mode)
21824{
21825 unsigned int units = UNITS_PER_WORD;
21826
21827 switch (GET_MODE_CLASS (mode))
21828 {
21829 default:
21830 break;
21831
21832 case MODE_CC:
21833 units = GET_MODE_SIZE (CCmode);
21834 break;
21835
21836 case MODE_FLOAT:
21837 if ((TARGET_SSE && mode == TFmode)
21838 || (TARGET_80387 && mode == XFmode)
21839 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
21840 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
21841 units = GET_MODE_SIZE (mode);
21842 break;
21843
21844 case MODE_COMPLEX_FLOAT:
21845 if ((TARGET_SSE && mode == TCmode)
21846 || (TARGET_80387 && mode == XCmode)
21847 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
21848 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
21849 units = GET_MODE_SIZE (mode);
21850 break;
21851
21852 case MODE_VECTOR_INT:
21853 case MODE_VECTOR_FLOAT:
21854 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
21855 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
21856 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21857 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21858 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
21859 && VALID_MMX_REG_MODE (mode)))
21860 units = GET_MODE_SIZE (mode);
21861 }
21862
21863 /* Return the cost of moving between two registers of mode MODE,
21864 assuming that the move will be in pieces of at most UNITS bytes. */
21865 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
21866}
21867
21868/* Return cost of vector operation in MODE given that scalar version has
21869 COST. */
21870
21871static int
21872ix86_vec_cost (machine_mode mode, int cost)
21873{
21874 if (!VECTOR_MODE_P (mode))
21875 return cost;
21876
21877 if (GET_MODE_BITSIZE (mode) == 128
21878 && TARGET_SSE_SPLIT_REGS)
21879 return cost * GET_MODE_BITSIZE (mode) / 64;
21880 else if (GET_MODE_BITSIZE (mode) > 128
21881 && TARGET_AVX256_SPLIT_REGS)
21882 return cost * GET_MODE_BITSIZE (mode) / 128;
21883 else if (GET_MODE_BITSIZE (mode) > 256
21884 && TARGET_AVX512_SPLIT_REGS)
21885 return cost * GET_MODE_BITSIZE (mode) / 256;
21886 return cost;
21887}
21888
21889/* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
21890 vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
21891static int
21892ix86_widen_mult_cost (const struct processor_costs *cost,
21893 enum machine_mode mode, bool uns_p)
21894{
21895 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
21896 int extra_cost = 0;
21897 int basic_cost = 0;
21898 switch (mode)
21899 {
21900 case V8HImode:
21901 case V16HImode:
21902 if (!uns_p || mode == V16HImode)
21903 extra_cost = cost->sse_op * 2;
21904 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21905 break;
21906 case V4SImode:
21907 case V8SImode:
21908 /* pmulhw/pmullw can be used. */
21909 basic_cost = cost->mulss * 2 + cost->sse_op * 2;
21910 break;
21911 case V2DImode:
21912 /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
21913 require extra 4 mul, 4 add, 4 cmp and 2 shift. */
21914 if (!TARGET_SSE4_1 && !uns_p)
21915 extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
21916 + cost->sse_op * 2;
21917 /* Fallthru. */
21918 case V4DImode:
21919 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21920 break;
21921 default:
21922 /* Not implemented. */
21923 return 100;
21924 }
21925 return ix86_vec_cost (mode, cost: basic_cost + extra_cost);
21926}
21927
21928/* Return cost of multiplication in MODE. */
21929
21930static int
21931ix86_multiplication_cost (const struct processor_costs *cost,
21932 enum machine_mode mode)
21933{
21934 machine_mode inner_mode = mode;
21935 if (VECTOR_MODE_P (mode))
21936 inner_mode = GET_MODE_INNER (mode);
21937
21938 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
21939 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
21940 else if (X87_FLOAT_MODE_P (mode))
21941 return cost->fmul;
21942 else if (FLOAT_MODE_P (mode))
21943 return ix86_vec_cost (mode,
21944 cost: inner_mode == DFmode ? cost->mulsd : cost->mulss);
21945 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21946 {
21947 int nmults, nops;
21948 /* Cost of reading the memory. */
21949 int extra;
21950
21951 switch (mode)
21952 {
21953 case V4QImode:
21954 case V8QImode:
21955 /* Partial V*QImode is emulated with 4-6 insns. */
21956 nmults = 1;
21957 nops = 3;
21958 extra = 0;
21959
21960 if (TARGET_AVX512BW && TARGET_AVX512VL)
21961 ;
21962 else if (TARGET_AVX2)
21963 nops += 2;
21964 else if (TARGET_XOP)
21965 extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
21966 else
21967 {
21968 nops += 1;
21969 extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
21970 }
21971 goto do_qimode;
21972
21973 case V16QImode:
21974 /* V*QImode is emulated with 4-11 insns. */
21975 nmults = 1;
21976 nops = 3;
21977 extra = 0;
21978
21979 if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
21980 {
21981 if (!(TARGET_AVX512BW && TARGET_AVX512VL))
21982 nops += 3;
21983 }
21984 else if (TARGET_XOP)
21985 {
21986 nmults += 1;
21987 nops += 2;
21988 extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
21989 }
21990 else
21991 {
21992 nmults += 1;
21993 nops += 4;
21994 extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
21995 }
21996 goto do_qimode;
21997
21998 case V32QImode:
21999 nmults = 1;
22000 nops = 3;
22001 extra = 0;
22002
22003 if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
22004 {
22005 nmults += 1;
22006 nops += 4;
22007 /* 2 loads, so no division by 2. */
22008 extra += COSTS_N_INSNS (cost->sse_load[3]);
22009 }
22010 goto do_qimode;
22011
22012 case V64QImode:
22013 nmults = 2;
22014 nops = 9;
22015 /* 2 loads of each size, so no division by 2. */
22016 extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
22017
22018 do_qimode:
22019 return ix86_vec_cost (mode, cost: cost->mulss * nmults
22020 + cost->sse_op * nops) + extra;
22021
22022 case V4SImode:
22023 /* pmulld is used in this case. No emulation is needed. */
22024 if (TARGET_SSE4_1)
22025 goto do_native;
22026 /* V4SImode is emulated with 7 insns. */
22027 else
22028 return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 5);
22029
22030 case V2DImode:
22031 case V4DImode:
22032 /* vpmullq is used in this case. No emulation is needed. */
22033 if (TARGET_AVX512DQ && TARGET_AVX512VL)
22034 goto do_native;
22035 /* V*DImode is emulated with 6-8 insns. */
22036 else if (TARGET_XOP && mode == V2DImode)
22037 return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 4);
22038 /* FALLTHRU */
22039 case V8DImode:
22040 /* vpmullq is used in this case. No emulation is needed. */
22041 if (TARGET_AVX512DQ && mode == V8DImode)
22042 goto do_native;
22043 else
22044 return ix86_vec_cost (mode, cost: cost->mulss * 3 + cost->sse_op * 5);
22045
22046 default:
22047 do_native:
22048 return ix86_vec_cost (mode, cost: cost->mulss);
22049 }
22050 }
22051 else
22052 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
22053}
22054
22055/* Return cost of multiplication in MODE. */
22056
22057static int
22058ix86_division_cost (const struct processor_costs *cost,
22059 enum machine_mode mode)
22060{
22061 machine_mode inner_mode = mode;
22062 if (VECTOR_MODE_P (mode))
22063 inner_mode = GET_MODE_INNER (mode);
22064
22065 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22066 return inner_mode == DFmode ? cost->divsd : cost->divss;
22067 else if (X87_FLOAT_MODE_P (mode))
22068 return cost->fdiv;
22069 else if (FLOAT_MODE_P (mode))
22070 return ix86_vec_cost (mode,
22071 cost: inner_mode == DFmode ? cost->divsd : cost->divss);
22072 else
22073 return cost->divide[MODE_INDEX (mode)];
22074}
22075
22076/* Return cost of shift in MODE.
22077 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
22078 AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
22079 if op1 is a result of subreg.
22080
22081 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
22082
22083static int
22084ix86_shift_rotate_cost (const struct processor_costs *cost,
22085 enum rtx_code code,
22086 enum machine_mode mode, bool constant_op1,
22087 HOST_WIDE_INT op1_val,
22088 bool and_in_op1,
22089 bool shift_and_truncate,
22090 bool *skip_op0, bool *skip_op1)
22091{
22092 if (skip_op0)
22093 *skip_op0 = *skip_op1 = false;
22094
22095 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22096 {
22097 int count;
22098 /* Cost of reading the memory. */
22099 int extra;
22100
22101 switch (mode)
22102 {
22103 case V4QImode:
22104 case V8QImode:
22105 if (TARGET_AVX2)
22106 /* Use vpbroadcast. */
22107 extra = cost->sse_op;
22108 else
22109 extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
22110
22111 if (constant_op1)
22112 {
22113 if (code == ASHIFTRT)
22114 {
22115 count = 4;
22116 extra *= 2;
22117 }
22118 else
22119 count = 2;
22120 }
22121 else if (TARGET_AVX512BW && TARGET_AVX512VL)
22122 return ix86_vec_cost (mode, cost: cost->sse_op * 4);
22123 else if (TARGET_SSE4_1)
22124 count = 5;
22125 else if (code == ASHIFTRT)
22126 count = 6;
22127 else
22128 count = 5;
22129 return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra;
22130
22131 case V16QImode:
22132 if (TARGET_XOP)
22133 {
22134 /* For XOP we use vpshab, which requires a broadcast of the
22135 value to the variable shift insn. For constants this
22136 means a V16Q const in mem; even when we can perform the
22137 shift with one insn set the cost to prefer paddb. */
22138 if (constant_op1)
22139 {
22140 extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
22141 return ix86_vec_cost (mode, cost: cost->sse_op) + extra;
22142 }
22143 else
22144 {
22145 count = (code == ASHIFT) ? 3 : 4;
22146 return ix86_vec_cost (mode, cost: cost->sse_op * count);
22147 }
22148 }
22149 /* FALLTHRU */
22150 case V32QImode:
22151 if (TARGET_GFNI && constant_op1)
22152 {
22153 /* Use vgf2p8affine. One extra load for the mask, but in a loop
22154 with enough registers it will be moved out. So for now don't
22155 account the constant mask load. This is not quite right
22156 for non loop vectorization. */
22157 extra = 0;
22158 return ix86_vec_cost (mode, cost: cost->sse_op) + extra;
22159 }
22160 if (TARGET_AVX2)
22161 /* Use vpbroadcast. */
22162 extra = cost->sse_op;
22163 else
22164 extra = COSTS_N_INSNS (mode == V16QImode
22165 ? cost->sse_load[2]
22166 : cost->sse_load[3]) / 2;
22167
22168 if (constant_op1)
22169 {
22170 if (code == ASHIFTRT)
22171 {
22172 count = 4;
22173 extra *= 2;
22174 }
22175 else
22176 count = 2;
22177 }
22178 else if (TARGET_AVX512BW
22179 && ((mode == V32QImode && !TARGET_PREFER_AVX256)
22180 || (mode == V16QImode && TARGET_AVX512VL
22181 && !TARGET_PREFER_AVX128)))
22182 return ix86_vec_cost (mode, cost: cost->sse_op * 4);
22183 else if (TARGET_AVX2
22184 && mode == V16QImode && !TARGET_PREFER_AVX128)
22185 count = 6;
22186 else if (TARGET_SSE4_1)
22187 count = 9;
22188 else if (code == ASHIFTRT)
22189 count = 10;
22190 else
22191 count = 9;
22192 return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra;
22193
22194 case V64QImode:
22195 /* Ignore the mask load for GF2P8AFFINEQB. */
22196 extra = 0;
22197 return ix86_vec_cost (mode, cost: cost->sse_op) + extra;
22198
22199 case V2DImode:
22200 case V4DImode:
22201 /* V*DImode arithmetic right shift is emulated. */
22202 if (code == ASHIFTRT && !TARGET_AVX512VL)
22203 {
22204 if (constant_op1)
22205 {
22206 if (op1_val == 63)
22207 count = TARGET_SSE4_2 ? 1 : 2;
22208 else if (TARGET_XOP)
22209 count = 2;
22210 else if (TARGET_SSE4_1)
22211 count = 3;
22212 else
22213 count = 4;
22214 }
22215 else if (TARGET_XOP)
22216 count = 3;
22217 else if (TARGET_SSE4_2)
22218 count = 4;
22219 else
22220 count = 5;
22221
22222 return ix86_vec_cost (mode, cost: cost->sse_op * count);
22223 }
22224 /* FALLTHRU */
22225 default:
22226 return ix86_vec_cost (mode, cost: cost->sse_op);
22227 }
22228 }
22229
22230 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22231 {
22232 if (constant_op1)
22233 {
22234 if (op1_val > 32)
22235 return cost->shift_const + COSTS_N_INSNS (2);
22236 else
22237 return cost->shift_const * 2;
22238 }
22239 else
22240 {
22241 if (and_in_op1)
22242 return cost->shift_var * 2;
22243 else
22244 return cost->shift_var * 6 + COSTS_N_INSNS (2);
22245 }
22246 }
22247 else
22248 {
22249 if (constant_op1)
22250 return cost->shift_const;
22251 else if (shift_and_truncate)
22252 {
22253 if (skip_op0)
22254 *skip_op0 = *skip_op1 = true;
22255 /* Return the cost after shift-and truncation. */
22256 return cost->shift_var;
22257 }
22258 else
22259 return cost->shift_var;
22260 }
22261}
22262
22263static int
22264ix86_insn_cost (rtx_insn *insn, bool speed)
22265{
22266 int insn_cost = 0;
22267 /* Add extra cost to avoid post_reload late_combine revert
22268 the optimization did in pass_rpad. */
22269 if (reload_completed
22270 && ix86_rpad_gate ()
22271 && recog_memoized (insn) >= 0
22272 && get_attr_avx_partial_xmm_update (insn)
22273 == AVX_PARTIAL_XMM_UPDATE_TRUE)
22274 insn_cost += COSTS_N_INSNS (3);
22275
22276 return insn_cost + pattern_cost (PATTERN (insn), speed);
22277}
22278
22279/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */
22280
22281static int
22282vec_fp_conversion_cost (const struct processor_costs *cost, int size)
22283{
22284 if (size < 128)
22285 return cost->cvtss2sd;
22286 else if (size < 256)
22287 {
22288 if (TARGET_SSE_SPLIT_REGS)
22289 return cost->cvtss2sd * size / 64;
22290 return cost->cvtss2sd;
22291 }
22292 if (size < 512)
22293 return cost->vcvtps2pd256;
22294 else
22295 return cost->vcvtps2pd512;
22296}
22297
22298/* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */
22299
22300static bool
22301unspec_pcmp_p (rtx x)
22302{
22303 return GET_CODE (x) == UNSPEC
22304 && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
22305}
22306
22307/* Compute a (partial) cost for rtx X. Return true if the complete
22308 cost has been computed, and false if subexpressions should be
22309 scanned. In either case, *TOTAL contains the cost result. */
22310
22311static bool
22312ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
22313 int *total, bool speed)
22314{
22315 rtx mask;
22316 enum rtx_code code = GET_CODE (x);
22317 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22318 const struct processor_costs *cost
22319 = speed ? ix86_tune_cost : &ix86_size_cost;
22320 int src_cost;
22321
22322 /* Handling different vternlog variants. */
22323 if ((GET_MODE_SIZE (mode) == 64
22324 ? TARGET_AVX512F
22325 : (TARGET_AVX512VL
22326 || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
22327 && GET_MODE_SIZE (mode) >= 16
22328 && outer_code_i == SET
22329 && ternlog_operand (x, mode))
22330 {
22331 rtx args[3];
22332
22333 args[0] = NULL_RTX;
22334 args[1] = NULL_RTX;
22335 args[2] = NULL_RTX;
22336 int idx = ix86_ternlog_idx (op: x, args);
22337 gcc_assert (idx >= 0);
22338
22339 *total = cost->sse_op;
22340 for (int i = 0; i != 3; i++)
22341 if (args[i])
22342 *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed);
22343 return true;
22344 }
22345
22346
22347 switch (code)
22348 {
22349 case SET:
22350 if (register_operand (SET_DEST (x), VOIDmode)
22351 && register_operand (SET_SRC (x), VOIDmode))
22352 {
22353 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
22354 return true;
22355 }
22356
22357 if (register_operand (SET_SRC (x), VOIDmode))
22358 /* Avoid potentially incorrect high cost from rtx_costs
22359 for non-tieable SUBREGs. */
22360 src_cost = 0;
22361 else
22362 {
22363 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
22364
22365 if (CONSTANT_P (SET_SRC (x)))
22366 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
22367 a small value, possibly zero for cheap constants. */
22368 src_cost += COSTS_N_INSNS (1);
22369 }
22370
22371 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
22372 return true;
22373
22374 case CONST_INT:
22375 case CONST:
22376 case LABEL_REF:
22377 case SYMBOL_REF:
22378 if (x86_64_immediate_operand (x, VOIDmode))
22379 *total = 0;
22380 else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode))
22381 /* Consider the zext constants slightly more expensive, as they
22382 can't appear in most instructions. */
22383 *total = 1;
22384 else
22385 /* movabsq is slightly more expensive than a simple instruction. */
22386 *total = COSTS_N_INSNS (1) + 1;
22387 return true;
22388
22389 case CONST_DOUBLE:
22390 if (IS_STACK_MODE (mode))
22391 switch (standard_80387_constant_p (x))
22392 {
22393 case -1:
22394 case 0:
22395 break;
22396 case 1: /* 0.0 */
22397 *total = 1;
22398 return true;
22399 default: /* Other constants */
22400 *total = 2;
22401 return true;
22402 }
22403 /* FALLTHRU */
22404
22405 case CONST_VECTOR:
22406 switch (standard_sse_constant_p (x, pred_mode: mode))
22407 {
22408 case 0:
22409 break;
22410 case 1: /* 0: xor eliminates false dependency */
22411 *total = 0;
22412 return true;
22413 default: /* -1: cmp contains false dependency */
22414 *total = 1;
22415 return true;
22416 }
22417 /* FALLTHRU */
22418
22419 case CONST_WIDE_INT:
22420 /* Fall back to (MEM (SYMBOL_REF)), since that's where
22421 it'll probably end up. Add a penalty for size. */
22422 *total = (COSTS_N_INSNS (1)
22423 + (!TARGET_64BIT && flag_pic)
22424 + (GET_MODE_SIZE (mode) <= 4
22425 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
22426 return true;
22427
22428 case ZERO_EXTEND:
22429 /* The zero extensions is often completely free on x86_64, so make
22430 it as cheap as possible. */
22431 if (TARGET_64BIT && mode == DImode
22432 && GET_MODE (XEXP (x, 0)) == SImode)
22433 *total = 1;
22434 else if (TARGET_ZERO_EXTEND_WITH_AND)
22435 *total = cost->add;
22436 else
22437 *total = cost->movzx;
22438 return false;
22439
22440 case SIGN_EXTEND:
22441 *total = cost->movsx;
22442 return false;
22443
22444 case ASHIFT:
22445 if (SCALAR_INT_MODE_P (mode)
22446 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
22447 && CONST_INT_P (XEXP (x, 1)))
22448 {
22449 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22450 if (value == 1)
22451 {
22452 *total = cost->add;
22453 return false;
22454 }
22455 if ((value == 2 || value == 3)
22456 && cost->lea <= cost->shift_const)
22457 {
22458 *total = cost->lea;
22459 return false;
22460 }
22461 }
22462 /* FALLTHRU */
22463
22464 case ROTATE:
22465 case ASHIFTRT:
22466 case LSHIFTRT:
22467 case ROTATERT:
22468 bool skip_op0, skip_op1;
22469 *total = ix86_shift_rotate_cost (cost, code, mode,
22470 CONSTANT_P (XEXP (x, 1)),
22471 CONST_INT_P (XEXP (x, 1))
22472 ? INTVAL (XEXP (x, 1)) : -1,
22473 GET_CODE (XEXP (x, 1)) == AND,
22474 SUBREG_P (XEXP (x, 1))
22475 && GET_CODE (XEXP (XEXP (x, 1),
22476 0)) == AND,
22477 skip_op0: &skip_op0, skip_op1: &skip_op1);
22478 if (skip_op0 || skip_op1)
22479 {
22480 if (!skip_op0)
22481 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22482 if (!skip_op1)
22483 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
22484 return true;
22485 }
22486 return false;
22487
22488 case FMA:
22489 {
22490 rtx sub;
22491
22492 gcc_assert (FLOAT_MODE_P (mode));
22493 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
22494
22495 *total = ix86_vec_cost (mode,
22496 GET_MODE_INNER (mode) == SFmode
22497 ? cost->fmass : cost->fmasd);
22498 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
22499
22500 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
22501 sub = XEXP (x, 0);
22502 if (GET_CODE (sub) == NEG)
22503 sub = XEXP (sub, 0);
22504 *total += rtx_cost (sub, mode, FMA, 0, speed);
22505
22506 sub = XEXP (x, 2);
22507 if (GET_CODE (sub) == NEG)
22508 sub = XEXP (sub, 0);
22509 *total += rtx_cost (sub, mode, FMA, 2, speed);
22510 return true;
22511 }
22512
22513 case MULT:
22514 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
22515 {
22516 rtx op0 = XEXP (x, 0);
22517 rtx op1 = XEXP (x, 1);
22518 int nbits;
22519 if (CONST_INT_P (XEXP (x, 1)))
22520 {
22521 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22522 for (nbits = 0; value != 0; value &= value - 1)
22523 nbits++;
22524 }
22525 else
22526 /* This is arbitrary. */
22527 nbits = 7;
22528
22529 /* Compute costs correctly for widening multiplication. */
22530 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22531 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22532 == GET_MODE_SIZE (mode))
22533 {
22534 int is_mulwiden = 0;
22535 machine_mode inner_mode = GET_MODE (op0);
22536
22537 if (GET_CODE (op0) == GET_CODE (op1))
22538 is_mulwiden = 1, op1 = XEXP (op1, 0);
22539 else if (CONST_INT_P (op1))
22540 {
22541 if (GET_CODE (op0) == SIGN_EXTEND)
22542 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22543 == INTVAL (op1);
22544 else
22545 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22546 }
22547
22548 if (is_mulwiden)
22549 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22550 }
22551
22552 int mult_init;
22553 // Double word multiplication requires 3 mults and 2 adds.
22554 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22555 {
22556 mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
22557 + 2 * cost->add;
22558 nbits *= 3;
22559 }
22560 else mult_init = cost->mult_init[MODE_INDEX (mode)];
22561
22562 *total = (mult_init
22563 + nbits * cost->mult_bit
22564 + rtx_cost (op0, mode, outer_code, opno, speed)
22565 + rtx_cost (op1, mode, outer_code, opno, speed));
22566
22567 return true;
22568 }
22569 *total = ix86_multiplication_cost (cost, mode);
22570 return false;
22571
22572 case DIV:
22573 case UDIV:
22574 case MOD:
22575 case UMOD:
22576 *total = ix86_division_cost (cost, mode);
22577 return false;
22578
22579 case PLUS:
22580 if (GET_MODE_CLASS (mode) == MODE_INT
22581 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
22582 {
22583 if (GET_CODE (XEXP (x, 0)) == PLUS
22584 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22585 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22586 && CONSTANT_P (XEXP (x, 1)))
22587 {
22588 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22589 if (val == 2 || val == 4 || val == 8)
22590 {
22591 *total = cost->lea;
22592 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22593 outer_code, opno, speed);
22594 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
22595 outer_code, opno, speed);
22596 *total += rtx_cost (XEXP (x, 1), mode,
22597 outer_code, opno, speed);
22598 return true;
22599 }
22600 }
22601 else if (GET_CODE (XEXP (x, 0)) == MULT
22602 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22603 {
22604 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22605 if (val == 2 || val == 4 || val == 8)
22606 {
22607 *total = cost->lea;
22608 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22609 outer_code, opno, speed);
22610 *total += rtx_cost (XEXP (x, 1), mode,
22611 outer_code, opno, speed);
22612 return true;
22613 }
22614 }
22615 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22616 {
22617 rtx op = XEXP (XEXP (x, 0), 0);
22618
22619 /* Add with carry, ignore the cost of adding a carry flag. */
22620 if (ix86_carry_flag_operator (op, mode)
22621 || ix86_carry_flag_unset_operator (op, mode))
22622 *total = cost->add;
22623 else
22624 {
22625 *total = cost->lea;
22626 *total += rtx_cost (op, mode,
22627 outer_code, opno, speed);
22628 }
22629
22630 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22631 outer_code, opno, speed);
22632 *total += rtx_cost (XEXP (x, 1), mode,
22633 outer_code, opno, speed);
22634 return true;
22635 }
22636 }
22637 /* FALLTHRU */
22638
22639 case MINUS:
22640 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
22641 if (GET_MODE_CLASS (mode) == MODE_INT
22642 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
22643 && GET_CODE (XEXP (x, 0)) == MINUS
22644 && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
22645 || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
22646 {
22647 *total = cost->add;
22648 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22649 outer_code, opno, speed);
22650 *total += rtx_cost (XEXP (x, 1), mode,
22651 outer_code, opno, speed);
22652 return true;
22653 }
22654
22655 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22656 *total = cost->addss;
22657 else if (X87_FLOAT_MODE_P (mode))
22658 *total = cost->fadd;
22659 else if (FLOAT_MODE_P (mode))
22660 *total = ix86_vec_cost (mode, cost: cost->addss);
22661 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22662 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22663 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22664 *total = cost->add * 2;
22665 else
22666 *total = cost->add;
22667 return false;
22668
22669 case IOR:
22670 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22671 || SSE_FLOAT_MODE_P (mode))
22672 {
22673 /* (ior (not ...) ...) can be a single insn in AVX512. */
22674 if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
22675 && (GET_MODE_SIZE (mode) == 64
22676 || (TARGET_AVX512VL
22677 && (GET_MODE_SIZE (mode) == 32
22678 || GET_MODE_SIZE (mode) == 16))))
22679 {
22680 rtx right = GET_CODE (XEXP (x, 1)) != NOT
22681 ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
22682
22683 *total = ix86_vec_cost (mode, cost: cost->sse_op)
22684 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22685 outer_code, opno, speed)
22686 + rtx_cost (right, mode, outer_code, opno, speed);
22687 return true;
22688 }
22689 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22690 }
22691 else if (TARGET_64BIT
22692 && mode == TImode
22693 && GET_CODE (XEXP (x, 0)) == ASHIFT
22694 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
22695 && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
22696 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
22697 && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
22698 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
22699 && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode)
22700 {
22701 /* *concatditi3 is cheap. */
22702 rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0);
22703 rtx op1 = XEXP (XEXP (x, 1), 0);
22704 *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode)
22705 ? COSTS_N_INSNS (1) /* movq. */
22706 : set_src_cost (x: op0, DImode, speed_p: speed);
22707 *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode)
22708 ? COSTS_N_INSNS (1) /* movq. */
22709 : set_src_cost (x: op1, DImode, speed_p: speed);
22710 return true;
22711 }
22712 else if (TARGET_64BIT
22713 && mode == TImode
22714 && GET_CODE (XEXP (x, 0)) == AND
22715 && REG_P (XEXP (XEXP (x, 0), 0))
22716 && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1))
22717 && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2
22718 && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1
22719 && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0
22720 && GET_CODE (XEXP (x, 1)) == ASHIFT
22721 && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND
22722 && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode
22723 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
22724 && INTVAL (XEXP (XEXP (x, 1), 1)) == 64)
22725 {
22726 /* *insvti_highpart is cheap. */
22727 rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0);
22728 *total = COSTS_N_INSNS (1) + 1;
22729 *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode)
22730 ? COSTS_N_INSNS (1) /* movq. */
22731 : set_src_cost (x: op, DImode, speed_p: speed);
22732 return true;
22733 }
22734 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22735 *total = cost->add * 2;
22736 else
22737 *total = cost->add;
22738 return false;
22739
22740 case XOR:
22741 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22742 || SSE_FLOAT_MODE_P (mode))
22743 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22744 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22745 *total = cost->add * 2;
22746 else
22747 *total = cost->add;
22748 return false;
22749
22750 case AND:
22751 if (address_no_seg_operand (x, mode))
22752 {
22753 *total = cost->lea;
22754 return true;
22755 }
22756 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22757 || SSE_FLOAT_MODE_P (mode))
22758 {
22759 /* pandn is a single instruction. */
22760 if (GET_CODE (XEXP (x, 0)) == NOT)
22761 {
22762 rtx right = XEXP (x, 1);
22763
22764 /* (and (not ...) (not ...)) can be a single insn in AVX512. */
22765 if (GET_CODE (right) == NOT && TARGET_AVX512F
22766 && (GET_MODE_SIZE (mode) == 64
22767 || (TARGET_AVX512VL
22768 && (GET_MODE_SIZE (mode) == 32
22769 || GET_MODE_SIZE (mode) == 16))))
22770 right = XEXP (right, 0);
22771
22772 *total = ix86_vec_cost (mode, cost: cost->sse_op)
22773 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22774 outer_code, opno, speed)
22775 + rtx_cost (right, mode, outer_code, opno, speed);
22776 return true;
22777 }
22778 else if (GET_CODE (XEXP (x, 1)) == NOT)
22779 {
22780 *total = ix86_vec_cost (mode, cost: cost->sse_op)
22781 + rtx_cost (XEXP (x, 0), mode,
22782 outer_code, opno, speed)
22783 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22784 outer_code, opno, speed);
22785 return true;
22786 }
22787 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22788 }
22789 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22790 {
22791 if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
22792 {
22793 *total = cost->add * 2
22794 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22795 outer_code, opno, speed)
22796 + rtx_cost (XEXP (x, 1), mode,
22797 outer_code, opno, speed);
22798 return true;
22799 }
22800 else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
22801 {
22802 *total = cost->add * 2
22803 + rtx_cost (XEXP (x, 0), mode,
22804 outer_code, opno, speed)
22805 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22806 outer_code, opno, speed);
22807 return true;
22808 }
22809 *total = cost->add * 2;
22810 }
22811 else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
22812 {
22813 *total = cost->add
22814 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22815 outer_code, opno, speed)
22816 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
22817 return true;
22818 }
22819 else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
22820 {
22821 *total = cost->add
22822 + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
22823 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22824 outer_code, opno, speed);
22825 return true;
22826 }
22827 else
22828 *total = cost->add;
22829 return false;
22830
22831 case NOT:
22832 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22833 {
22834 /* (not (xor ...)) can be a single insn in AVX512. */
22835 if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
22836 && (GET_MODE_SIZE (mode) == 64
22837 || (TARGET_AVX512VL
22838 && (GET_MODE_SIZE (mode) == 32
22839 || GET_MODE_SIZE (mode) == 16))))
22840 {
22841 *total = ix86_vec_cost (mode, cost: cost->sse_op)
22842 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22843 outer_code, opno, speed)
22844 + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22845 outer_code, opno, speed);
22846 return true;
22847 }
22848
22849 // vnot is pxor -1.
22850 *total = ix86_vec_cost (mode, cost: cost->sse_op) + 1;
22851 }
22852 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22853 *total = cost->add * 2;
22854 else
22855 *total = cost->add;
22856 return false;
22857
22858 case NEG:
22859 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22860 *total = cost->sse_op;
22861 else if (X87_FLOAT_MODE_P (mode))
22862 *total = cost->fchs;
22863 else if (FLOAT_MODE_P (mode))
22864 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22865 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22866 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22867 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22868 *total = cost->add * 3;
22869 else
22870 *total = cost->add;
22871 return false;
22872
22873 case COMPARE:
22874 rtx op0, op1;
22875 op0 = XEXP (x, 0);
22876 op1 = XEXP (x, 1);
22877 if (GET_CODE (op0) == ZERO_EXTRACT
22878 && XEXP (op0, 1) == const1_rtx
22879 && CONST_INT_P (XEXP (op0, 2))
22880 && op1 == const0_rtx)
22881 {
22882 /* This kind of construct is implemented using test[bwl].
22883 Treat it as if we had an AND. */
22884 mode = GET_MODE (XEXP (op0, 0));
22885 *total = (cost->add
22886 + rtx_cost (XEXP (op0, 0), mode, outer_code,
22887 opno, speed)
22888 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
22889 return true;
22890 }
22891
22892 if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
22893 {
22894 /* This is an overflow detection, count it as a normal compare. */
22895 *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
22896 return true;
22897 }
22898
22899 rtx geu;
22900 /* Match x
22901 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
22902 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
22903 if (mode == CCCmode
22904 && GET_CODE (op0) == NEG
22905 && GET_CODE (geu = XEXP (op0, 0)) == GEU
22906 && REG_P (XEXP (geu, 0))
22907 && (GET_MODE (XEXP (geu, 0)) == CCCmode
22908 || GET_MODE (XEXP (geu, 0)) == CCmode)
22909 && REGNO (XEXP (geu, 0)) == FLAGS_REG
22910 && XEXP (geu, 1) == const0_rtx
22911 && GET_CODE (op1) == LTU
22912 && REG_P (XEXP (op1, 0))
22913 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
22914 && REGNO (XEXP (op1, 0)) == FLAGS_REG
22915 && XEXP (op1, 1) == const0_rtx)
22916 {
22917 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
22918 *total = 0;
22919 return true;
22920 }
22921 /* Match x
22922 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
22923 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
22924 if (mode == CCCmode
22925 && GET_CODE (op0) == NEG
22926 && GET_CODE (XEXP (op0, 0)) == LTU
22927 && REG_P (XEXP (XEXP (op0, 0), 0))
22928 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
22929 && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
22930 && XEXP (XEXP (op0, 0), 1) == const0_rtx
22931 && GET_CODE (op1) == GEU
22932 && REG_P (XEXP (op1, 0))
22933 && GET_MODE (XEXP (op1, 0)) == CCCmode
22934 && REGNO (XEXP (op1, 0)) == FLAGS_REG
22935 && XEXP (op1, 1) == const0_rtx)
22936 {
22937 /* This is *x86_cmc. */
22938 if (!speed)
22939 *total = COSTS_N_BYTES (1);
22940 else if (TARGET_SLOW_STC)
22941 *total = COSTS_N_INSNS (2);
22942 else
22943 *total = COSTS_N_INSNS (1);
22944 return true;
22945 }
22946
22947 if (SCALAR_INT_MODE_P (GET_MODE (op0))
22948 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
22949 {
22950 if (op1 == const0_rtx)
22951 *total = cost->add
22952 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
22953 else
22954 *total = 3*cost->add
22955 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
22956 + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
22957 return true;
22958 }
22959
22960 /* The embedded comparison operand is completely free. */
22961 if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
22962 *total = 0;
22963
22964 return false;
22965
22966 case FLOAT_EXTEND:
22967 /* x87 represents all values extended to 80bit. */
22968 if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22969 *total = 0;
22970 else
22971 *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
22972 return false;
22973
22974 case FLOAT_TRUNCATE:
22975 if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22976 *total = cost->fadd;
22977 else
22978 *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
22979 return false;
22980 case FLOAT:
22981 case UNSIGNED_FLOAT:
22982 if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22983 /* TODO: We do not have cost tables for x87. */
22984 *total = cost->fadd;
22985 else if (VECTOR_MODE_P (mode))
22986 *total = ix86_vec_cost (mode, cost: cost->cvtpi2ps);
22987 else
22988 *total = cost->cvtsi2ss;
22989 return false;
22990
22991 case FIX:
22992 case UNSIGNED_FIX:
22993 if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22994 /* TODO: We do not have cost tables for x87. */
22995 *total = cost->fadd;
22996 else if (VECTOR_MODE_P (mode))
22997 *total = ix86_vec_cost (mode, cost: cost->cvtps2pi);
22998 else
22999 *total = cost->cvtss2si;
23000 return false;
23001
23002 case ABS:
23003 /* SSE requires memory load for the constant operand. It may make
23004 sense to account for this. Of course the constant operand may or
23005 may not be reused. */
23006 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23007 *total = cost->sse_op;
23008 else if (X87_FLOAT_MODE_P (mode))
23009 *total = cost->fabs;
23010 else if (FLOAT_MODE_P (mode))
23011 *total = ix86_vec_cost (mode, cost: cost->sse_op);
23012 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23013 *total = cost->sse_op;
23014 return false;
23015
23016 case SQRT:
23017 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
23018 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
23019 else if (X87_FLOAT_MODE_P (mode))
23020 *total = cost->fsqrt;
23021 else if (FLOAT_MODE_P (mode))
23022 *total = ix86_vec_cost (mode,
23023 cost: mode == SFmode ? cost->sqrtss : cost->sqrtsd);
23024 return false;
23025
23026 case UNSPEC:
23027 if (XINT (x, 1) == UNSPEC_TP)
23028 *total = 0;
23029 else if (XINT (x, 1) == UNSPEC_VTERNLOG)
23030 {
23031 *total = cost->sse_op;
23032 *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
23033 *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
23034 *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
23035 return true;
23036 }
23037 else if (XINT (x, 1) == UNSPEC_PTEST)
23038 {
23039 *total = cost->sse_op;
23040 rtx test_op0 = XVECEXP (x, 0, 0);
23041 if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
23042 return false;
23043 if (GET_CODE (test_op0) == AND)
23044 {
23045 rtx and_op0 = XEXP (test_op0, 0);
23046 if (GET_CODE (and_op0) == NOT)
23047 and_op0 = XEXP (and_op0, 0);
23048 *total += rtx_cost (and_op0, GET_MODE (and_op0),
23049 AND, 0, speed)
23050 + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
23051 AND, 1, speed);
23052 }
23053 else
23054 *total = rtx_cost (test_op0, GET_MODE (test_op0),
23055 UNSPEC, 0, speed);
23056 return true;
23057 }
23058 return false;
23059
23060 case VEC_CONCAT:
23061 /* ??? Assume all of these vector manipulation patterns are
23062 recognizable. In which case they all pretty much have the
23063 same cost.
23064 ??? We should still recruse when computing cost. */
23065 *total = cost->sse_op;
23066 return true;
23067
23068 case VEC_SELECT:
23069 /* Special case extracting lower part from the vector.
23070 This by itself needs to code and most of SSE/AVX instructions have
23071 packed and single forms where the single form may be represented
23072 by such VEC_SELECT.
23073
23074 Use cost 1 (despite the fact that functionally equivalent SUBREG has
23075 cost 0). Making VEC_SELECT completely free, for example instructs CSE
23076 to forward propagate VEC_SELECT into
23077
23078 (set (reg eax) (reg src))
23079
23080 which then prevents fwprop and combining. See i.e.
23081 gcc.target/i386/pr91103-1.c.
23082
23083 ??? rtvec_series_p test should be, for valid patterns, equivalent to
23084 vec_series_lowpart_p but is not, since the latter calls
23085 can_cange_mode_class on ALL_REGS and this return false since x87 does
23086 not support subregs at all. */
23087 if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
23088 *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
23089 outer_code, opno, speed) + 1;
23090 else
23091 /* ??? We should still recruse when computing cost. */
23092 *total = cost->sse_op;
23093 return true;
23094
23095 case VEC_DUPLICATE:
23096 *total = rtx_cost (XEXP (x, 0),
23097 GET_MODE (XEXP (x, 0)),
23098 VEC_DUPLICATE, 0, speed);
23099 /* It's broadcast instruction, not embedded broadcasting. */
23100 if (outer_code == SET)
23101 *total += cost->sse_op;
23102
23103 return true;
23104
23105 case VEC_MERGE:
23106 mask = XEXP (x, 2);
23107 /* Scalar versions of SSE instructions may be represented as:
23108
23109 (vec_merge (vec_duplicate (operation ....))
23110 (register or memory)
23111 (const_int 1))
23112
23113 In this case vec_merge and vec_duplicate is for free.
23114 Just recurse into operation and second operand. */
23115 if (mask == const1_rtx
23116 && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
23117 {
23118 *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
23119 outer_code, opno, speed)
23120 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23121 return true;
23122 }
23123 /* This is masked instruction, assume the same cost,
23124 as nonmasked variant. */
23125 else if (TARGET_AVX512F
23126 && (register_operand (mask, GET_MODE (mask))
23127 /* Redunduant clean up of high bits for kmask with VL=2/4
23128 .i.e (vec_merge op0, op1, (and op3 15)). */
23129 || (GET_CODE (mask) == AND
23130 && register_operand (XEXP (mask, 0), GET_MODE (mask))
23131 && CONST_INT_P (XEXP (mask, 1))
23132 && ((INTVAL (XEXP (mask, 1)) == 3
23133 && GET_MODE_NUNITS (mode) == 2)
23134 || (INTVAL (XEXP (mask, 1)) == 15
23135 && GET_MODE_NUNITS (mode) == 4)))))
23136 {
23137 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23138 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23139 return true;
23140 }
23141 /* Combination of the two above:
23142
23143 (vec_merge (vec_merge (vec_duplicate (operation ...))
23144 (register or memory)
23145 (reg:QI mask))
23146 (register or memory)
23147 (const_int 1))
23148
23149 i.e. avx512fp16_vcvtss2sh_mask. */
23150 else if (TARGET_AVX512F
23151 && mask == const1_rtx
23152 && GET_CODE (XEXP (x, 0)) == VEC_MERGE
23153 && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
23154 && register_operand (XEXP (XEXP (x, 0), 2),
23155 GET_MODE (XEXP (XEXP (x, 0), 2))))
23156 {
23157 *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
23158 mode, outer_code, opno, speed)
23159 + rtx_cost (XEXP (XEXP (x, 0), 1),
23160 mode, outer_code, opno, speed)
23161 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
23162 return true;
23163 }
23164 /* vcmp. */
23165 else if (unspec_pcmp_p (x: mask)
23166 || (GET_CODE (mask) == NOT
23167 && unspec_pcmp_p (XEXP (mask, 0))))
23168 {
23169 rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
23170 rtx unsop0 = XVECEXP (uns, 0, 0);
23171 /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
23172 cost the same as register.
23173 This is used by avx_cmp<mode>3_ltint_not. */
23174 if (SUBREG_P (unsop0))
23175 unsop0 = XEXP (unsop0, 0);
23176 if (GET_CODE (unsop0) == NOT)
23177 unsop0 = XEXP (unsop0, 0);
23178 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
23179 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
23180 + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
23181 + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
23182 + cost->sse_op;
23183 return true;
23184 }
23185 else
23186 *total = cost->sse_op;
23187 return false;
23188
23189 case MEM:
23190 /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
23191 or variants in ix86_vector_duplicate_simode_const. */
23192
23193 if (GET_MODE_SIZE (mode) >= 16
23194 && VECTOR_MODE_P (mode)
23195 && SYMBOL_REF_P (XEXP (x, 0))
23196 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
23197 && ix86_broadcast_from_constant (mode, x))
23198 {
23199 *total = COSTS_N_INSNS (2) + speed;
23200 return true;
23201 }
23202
23203 /* An insn that accesses memory is slightly more expensive
23204 than one that does not. */
23205 if (speed)
23206 {
23207 *total += 1;
23208 rtx addr = XEXP (x, 0);
23209 /* For MEM, rtx_cost iterates each subrtx, and adds up the costs,
23210 so for MEM (reg) and MEM (reg + 4), the former costs 5,
23211 the latter costs 9, it is not accurate for x86. Ideally
23212 address_cost should be used, but it reduce cost too much.
23213 So current solution is make constant disp as cheap as possible. */
23214 if (GET_CODE (addr) == PLUS
23215 && x86_64_immediate_operand (XEXP (addr, 1), Pmode)
23216 /* Only handle (reg + disp) since other forms of addr are mostly LEA,
23217 there's no additional cost for the plus of disp. */
23218 && register_operand (XEXP (addr, 0), Pmode))
23219 {
23220 *total += 1;
23221 *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
23222 return true;
23223 }
23224 }
23225
23226 return false;
23227
23228 case ZERO_EXTRACT:
23229 if (XEXP (x, 1) == const1_rtx
23230 && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
23231 && GET_MODE (XEXP (x, 2)) == SImode
23232 && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
23233 {
23234 /* Ignore cost of zero extension and masking of last argument. */
23235 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23236 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23237 *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
23238 return true;
23239 }
23240 return false;
23241
23242 case IF_THEN_ELSE:
23243 if (TARGET_XOP
23244 && VECTOR_MODE_P (mode)
23245 && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
23246 {
23247 /* vpcmov. */
23248 *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
23249 if (!REG_P (XEXP (x, 0)))
23250 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23251 if (!REG_P (XEXP (x, 1)))
23252 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23253 if (!REG_P (XEXP (x, 2)))
23254 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
23255 return true;
23256 }
23257 else if (TARGET_CMOVE
23258 && SCALAR_INT_MODE_P (mode)
23259 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
23260 {
23261 /* cmov. */
23262 *total = COSTS_N_INSNS (1);
23263 if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
23264 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23265 if (!REG_P (XEXP (x, 1)))
23266 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23267 if (!REG_P (XEXP (x, 2)))
23268 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
23269 return true;
23270 }
23271 return false;
23272
23273 default:
23274 return false;
23275 }
23276}
23277
23278#if TARGET_MACHO
23279
23280static int current_machopic_label_num;
23281
23282/* Given a symbol name and its associated stub, write out the
23283 definition of the stub. */
23284
23285void
23286machopic_output_stub (FILE *file, const char *symb, const char *stub)
23287{
23288 unsigned int length;
23289 char *binder_name, *symbol_name, lazy_ptr_name[32];
23290 int label = ++current_machopic_label_num;
23291
23292 /* For 64-bit we shouldn't get here. */
23293 gcc_assert (!TARGET_64BIT);
23294
23295 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
23296 symb = targetm.strip_name_encoding (symb);
23297
23298 length = strlen (stub);
23299 binder_name = XALLOCAVEC (char, length + 32);
23300 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
23301
23302 length = strlen (symb);
23303 symbol_name = XALLOCAVEC (char, length + 32);
23304 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
23305
23306 sprintf (lazy_ptr_name, "L%d$lz", label);
23307
23308 if (MACHOPIC_ATT_STUB)
23309 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
23310 else if (MACHOPIC_PURE)
23311 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
23312 else
23313 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
23314
23315 fprintf (file, "%s:\n", stub);
23316 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23317
23318 if (MACHOPIC_ATT_STUB)
23319 {
23320 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
23321 }
23322 else if (MACHOPIC_PURE)
23323 {
23324 /* PIC stub. */
23325 /* 25-byte PIC stub using "CALL get_pc_thunk". */
23326 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
23327 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
23328 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
23329 label, lazy_ptr_name, label);
23330 fprintf (file, "\tjmp\t*%%ecx\n");
23331 }
23332 else
23333 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
23334
23335 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
23336 it needs no stub-binding-helper. */
23337 if (MACHOPIC_ATT_STUB)
23338 return;
23339
23340 fprintf (file, "%s:\n", binder_name);
23341
23342 if (MACHOPIC_PURE)
23343 {
23344 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
23345 fprintf (file, "\tpushl\t%%ecx\n");
23346 }
23347 else
23348 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
23349
23350 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
23351
23352 /* N.B. Keep the correspondence of these
23353 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
23354 old-pic/new-pic/non-pic stubs; altering this will break
23355 compatibility with existing dylibs. */
23356 if (MACHOPIC_PURE)
23357 {
23358 /* 25-byte PIC stub using "CALL get_pc_thunk". */
23359 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
23360 }
23361 else
23362 /* 16-byte -mdynamic-no-pic stub. */
23363 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
23364
23365 fprintf (file, "%s:\n", lazy_ptr_name);
23366 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23367 fprintf (file, ASM_LONG "%s\n", binder_name);
23368}
23369#endif /* TARGET_MACHO */
23370
23371/* Order the registers for register allocator. */
23372
23373void
23374x86_order_regs_for_local_alloc (void)
23375{
23376 int pos = 0;
23377 int i;
23378
23379 /* First allocate the local general purpose registers. */
23380 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23381 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (regno: i))
23382 reg_alloc_order [pos++] = i;
23383
23384 /* Global general purpose registers. */
23385 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23386 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (regno: i))
23387 reg_alloc_order [pos++] = i;
23388
23389 /* x87 registers come first in case we are doing FP math
23390 using them. */
23391 if (!TARGET_SSE_MATH)
23392 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23393 reg_alloc_order [pos++] = i;
23394
23395 /* SSE registers. */
23396 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23397 reg_alloc_order [pos++] = i;
23398 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23399 reg_alloc_order [pos++] = i;
23400
23401 /* Extended REX SSE registers. */
23402 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
23403 reg_alloc_order [pos++] = i;
23404
23405 /* Mask register. */
23406 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
23407 reg_alloc_order [pos++] = i;
23408
23409 /* x87 registers. */
23410 if (TARGET_SSE_MATH)
23411 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23412 reg_alloc_order [pos++] = i;
23413
23414 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23415 reg_alloc_order [pos++] = i;
23416
23417 /* Initialize the rest of array as we do not allocate some registers
23418 at all. */
23419 while (pos < FIRST_PSEUDO_REGISTER)
23420 reg_alloc_order [pos++] = 0;
23421}
23422
23423static bool
23424ix86_ms_bitfield_layout_p (const_tree record_type)
23425{
23426 return ((TARGET_MS_BITFIELD_LAYOUT
23427 && !lookup_attribute (attr_name: "gcc_struct", TYPE_ATTRIBUTES (record_type)))
23428 || lookup_attribute (attr_name: "ms_struct", TYPE_ATTRIBUTES (record_type)));
23429}
23430
23431/* Returns an expression indicating where the this parameter is
23432 located on entry to the FUNCTION. */
23433
23434static rtx
23435x86_this_parameter (tree function)
23436{
23437 tree type = TREE_TYPE (function);
23438 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
23439 int nregs;
23440
23441 if (TARGET_64BIT)
23442 {
23443 const int *parm_regs;
23444
23445 if (lookup_attribute (attr_name: "preserve_none", TYPE_ATTRIBUTES (type)))
23446 parm_regs = x86_64_preserve_none_int_parameter_registers;
23447 else if (ix86_function_type_abi (fntype: type) == MS_ABI)
23448 parm_regs = x86_64_ms_abi_int_parameter_registers;
23449 else
23450 parm_regs = x86_64_int_parameter_registers;
23451 return gen_rtx_REG (Pmode, parm_regs[aggr]);
23452 }
23453
23454 nregs = ix86_function_regparm (type, decl: function);
23455
23456 if (nregs > 0 && !stdarg_p (type))
23457 {
23458 int regno;
23459 unsigned int ccvt = ix86_get_callcvt (type);
23460
23461 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
23462 regno = aggr ? DX_REG : CX_REG;
23463 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
23464 {
23465 regno = CX_REG;
23466 if (aggr)
23467 return gen_rtx_MEM (SImode,
23468 plus_constant (Pmode, stack_pointer_rtx, 4));
23469 }
23470 else
23471 {
23472 regno = AX_REG;
23473 if (aggr)
23474 {
23475 regno = DX_REG;
23476 if (nregs == 1)
23477 return gen_rtx_MEM (SImode,
23478 plus_constant (Pmode,
23479 stack_pointer_rtx, 4));
23480 }
23481 }
23482 return gen_rtx_REG (SImode, regno);
23483 }
23484
23485 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
23486 aggr ? 8 : 4));
23487}
23488
23489/* Determine whether x86_output_mi_thunk can succeed. */
23490
23491static bool
23492x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
23493 const_tree function)
23494{
23495 /* 64-bit can handle anything. */
23496 if (TARGET_64BIT)
23497 return true;
23498
23499 /* For 32-bit, everything's fine if we have one free register. */
23500 if (ix86_function_regparm (TREE_TYPE (function), decl: function) < 3)
23501 return true;
23502
23503 /* Need a free register for vcall_offset. */
23504 if (vcall_offset)
23505 return false;
23506
23507 /* Need a free register for GOT references. */
23508 if (flag_pic && !targetm.binds_local_p (function))
23509 return false;
23510
23511 /* Otherwise ok. */
23512 return true;
23513}
23514
23515/* Output the assembler code for a thunk function. THUNK_DECL is the
23516 declaration for the thunk function itself, FUNCTION is the decl for
23517 the target function. DELTA is an immediate constant offset to be
23518 added to THIS. If VCALL_OFFSET is nonzero, the word at
23519 *(*this + vcall_offset) should be added to THIS. */
23520
23521static void
23522x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
23523 HOST_WIDE_INT vcall_offset, tree function)
23524{
23525 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
23526 rtx this_param = x86_this_parameter (function);
23527 rtx this_reg, tmp, fnaddr;
23528 unsigned int tmp_regno;
23529 rtx_insn *insn;
23530 int saved_flag_force_indirect_call = flag_force_indirect_call;
23531
23532 if (TARGET_64BIT)
23533 tmp_regno = R10_REG;
23534 else
23535 {
23536 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
23537 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
23538 tmp_regno = AX_REG;
23539 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
23540 tmp_regno = DX_REG;
23541 else
23542 tmp_regno = CX_REG;
23543
23544 if (flag_pic)
23545 flag_force_indirect_call = 0;
23546 }
23547
23548 emit_note (NOTE_INSN_PROLOGUE_END);
23549
23550 /* CET is enabled, insert EB instruction. */
23551 if ((flag_cf_protection & CF_BRANCH))
23552 emit_insn (gen_nop_endbr ());
23553
23554 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23555 pull it in now and let DELTA benefit. */
23556 if (REG_P (this_param))
23557 this_reg = this_param;
23558 else if (vcall_offset)
23559 {
23560 /* Put the this parameter into %eax. */
23561 this_reg = gen_rtx_REG (Pmode, AX_REG);
23562 emit_move_insn (this_reg, this_param);
23563 }
23564 else
23565 this_reg = NULL_RTX;
23566
23567 /* Adjust the this parameter by a fixed constant. */
23568 if (delta)
23569 {
23570 rtx delta_rtx = GEN_INT (delta);
23571 rtx delta_dst = this_reg ? this_reg : this_param;
23572
23573 if (TARGET_64BIT)
23574 {
23575 if (!x86_64_general_operand (delta_rtx, Pmode))
23576 {
23577 tmp = gen_rtx_REG (Pmode, tmp_regno);
23578 emit_move_insn (tmp, delta_rtx);
23579 delta_rtx = tmp;
23580 }
23581 }
23582
23583 ix86_emit_binop (code: PLUS, Pmode, dst: delta_dst, src: delta_rtx);
23584 }
23585
23586 /* Adjust the this parameter by a value stored in the vtable. */
23587 if (vcall_offset)
23588 {
23589 rtx vcall_addr, vcall_mem, this_mem;
23590
23591 tmp = gen_rtx_REG (Pmode, tmp_regno);
23592
23593 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
23594 if (Pmode != ptr_mode)
23595 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
23596 emit_move_insn (tmp, this_mem);
23597
23598 /* Adjust the this parameter. */
23599 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
23600 if (TARGET_64BIT
23601 && !ix86_legitimate_address_p (ptr_mode, addr: vcall_addr, strict: true))
23602 {
23603 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
23604 emit_move_insn (tmp2, GEN_INT (vcall_offset));
23605 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
23606 }
23607
23608 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
23609 if (Pmode != ptr_mode)
23610 emit_insn (gen_addsi_1_zext (this_reg,
23611 gen_rtx_REG (ptr_mode,
23612 REGNO (this_reg)),
23613 vcall_mem));
23614 else
23615 ix86_emit_binop (code: PLUS, Pmode, dst: this_reg, src: vcall_mem);
23616 }
23617
23618 /* If necessary, drop THIS back to its stack slot. */
23619 if (this_reg && this_reg != this_param)
23620 emit_move_insn (this_param, this_reg);
23621
23622 fnaddr = XEXP (DECL_RTL (function), 0);
23623 if (TARGET_64BIT)
23624 {
23625 if (!flag_pic || targetm.binds_local_p (function)
23626 || TARGET_PECOFF)
23627 ;
23628 else
23629 {
23630 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
23631 tmp = gen_rtx_CONST (Pmode, tmp);
23632 fnaddr = gen_const_mem (Pmode, tmp);
23633 }
23634 }
23635 else
23636 {
23637 if (!flag_pic || targetm.binds_local_p (function))
23638 ;
23639#if TARGET_MACHO
23640 else if (TARGET_MACHO)
23641 {
23642 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
23643 fnaddr = XEXP (fnaddr, 0);
23644 }
23645#endif /* TARGET_MACHO */
23646 else
23647 {
23648 tmp = gen_rtx_REG (Pmode, CX_REG);
23649 output_set_got (dest: tmp, NULL_RTX);
23650
23651 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
23652 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
23653 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
23654 fnaddr = gen_const_mem (Pmode, fnaddr);
23655 }
23656 }
23657
23658 /* Our sibling call patterns do not allow memories, because we have no
23659 predicate that can distinguish between frame and non-frame memory.
23660 For our purposes here, we can get away with (ab)using a jump pattern,
23661 because we're going to do no optimization. */
23662 if (MEM_P (fnaddr))
23663 {
23664 if (sibcall_insn_operand (fnaddr, word_mode))
23665 {
23666 fnaddr = XEXP (DECL_RTL (function), 0);
23667 tmp = gen_rtx_MEM (QImode, fnaddr);
23668 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
23669 tmp = emit_call_insn (tmp);
23670 SIBLING_CALL_P (tmp) = 1;
23671 }
23672 else
23673 emit_jump_insn (gen_indirect_jump (fnaddr));
23674 }
23675 else
23676 {
23677 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
23678 {
23679 // CM_LARGE_PIC always uses pseudo PIC register which is
23680 // uninitialized. Since FUNCTION is local and calling it
23681 // doesn't go through PLT, we use scratch register %r11 as
23682 // PIC register and initialize it here.
23683 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
23684 ix86_init_large_pic_reg (tmp_regno);
23685 fnaddr = legitimize_pic_address (orig: fnaddr,
23686 reg: gen_rtx_REG (Pmode, tmp_regno));
23687 }
23688
23689 if (!sibcall_insn_operand (fnaddr, word_mode))
23690 {
23691 tmp = gen_rtx_REG (word_mode, tmp_regno);
23692 if (GET_MODE (fnaddr) != word_mode)
23693 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
23694 emit_move_insn (tmp, fnaddr);
23695 fnaddr = tmp;
23696 }
23697
23698 tmp = gen_rtx_MEM (QImode, fnaddr);
23699 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
23700 tmp = emit_call_insn (tmp);
23701 SIBLING_CALL_P (tmp) = 1;
23702 }
23703 emit_barrier ();
23704
23705 /* Emit just enough of rest_of_compilation to get the insns emitted. */
23706 insn = get_insns ();
23707 shorten_branches (insn);
23708 assemble_start_function (thunk_fndecl, fnname);
23709 final_start_function (insn, file, 1);
23710 final (insn, file, 1);
23711 final_end_function ();
23712 assemble_end_function (thunk_fndecl, fnname);
23713
23714 flag_force_indirect_call = saved_flag_force_indirect_call;
23715}
23716
23717static void
23718x86_file_start (void)
23719{
23720 default_file_start ();
23721 if (TARGET_16BIT)
23722 fputs (s: "\t.code16gcc\n", stream: asm_out_file);
23723#if TARGET_MACHO
23724 darwin_file_start ();
23725#endif
23726 if (X86_FILE_START_VERSION_DIRECTIVE)
23727 fputs (s: "\t.version\t\"01.01\"\n", stream: asm_out_file);
23728 if (X86_FILE_START_FLTUSED)
23729 fputs (s: "\t.global\t__fltused\n", stream: asm_out_file);
23730 if (ix86_asm_dialect == ASM_INTEL)
23731 fputs (s: "\t.intel_syntax noprefix\n", stream: asm_out_file);
23732}
23733
23734int
23735x86_field_alignment (tree type, int computed)
23736{
23737 machine_mode mode;
23738
23739 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23740 return computed;
23741 if (TARGET_IAMCU)
23742 return iamcu_alignment (type, align: computed);
23743 type = strip_array_types (type);
23744 mode = TYPE_MODE (type);
23745 if (mode == DFmode || mode == DCmode
23746 || GET_MODE_CLASS (mode) == MODE_INT
23747 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23748 {
23749 if (TYPE_ATOMIC (type) && computed > 32)
23750 {
23751 static bool warned;
23752
23753 if (!warned && warn_psabi)
23754 {
23755 const char *url
23756 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
23757
23758 warned = true;
23759 inform (input_location, "the alignment of %<_Atomic %T%> "
23760 "fields changed in %{GCC 11.1%}",
23761 TYPE_MAIN_VARIANT (type), url);
23762 }
23763 }
23764 else
23765 return MIN (32, computed);
23766 }
23767 return computed;
23768}
23769
23770/* Print call to TARGET to FILE. */
23771
23772static void
23773x86_print_call_or_nop (FILE *file, const char *target,
23774 const char *label)
23775{
23776 if (flag_nop_mcount || !strcmp (s1: target, s2: "nop"))
23777 {
23778 if (TARGET_16BIT)
23779 /* 3 byte no-op: lea 0(%si), %si */
23780 fprintf (stream: file, format: "%s" ASM_BYTE "0x8d, 0x74, 0x00\n", label);
23781 else
23782 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
23783 fprintf (stream: file, format: "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n",
23784 label);
23785 }
23786 else if (!TARGET_PECOFF && flag_pic)
23787 {
23788 gcc_assert (flag_plt);
23789
23790 fprintf (stream: file, format: "%s\tcall\t%s@PLT\n", label, target);
23791 }
23792 else
23793 fprintf (stream: file, format: "%s\tcall\t%s\n", label, target);
23794}
23795
23796static bool
23797current_fentry_name (const char **name)
23798{
23799 tree attr = lookup_attribute (attr_name: "fentry_name",
23800 DECL_ATTRIBUTES (current_function_decl));
23801 if (!attr)
23802 return false;
23803 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
23804 return true;
23805}
23806
23807static bool
23808current_fentry_section (const char **name)
23809{
23810 tree attr = lookup_attribute (attr_name: "fentry_section",
23811 DECL_ATTRIBUTES (current_function_decl));
23812 if (!attr)
23813 return false;
23814 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
23815 return true;
23816}
23817
23818/* Return a caller-saved register which isn't live or a callee-saved
23819 register which has been saved on stack in the prologue at entry for
23820 profile. */
23821
23822static int
23823x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
23824{
23825 /* Use %r10 if the profiler is emitted before the prologue or it isn't
23826 used by DRAP. */
23827 if (ix86_profile_before_prologue ()
23828 || !crtl->drap_reg
23829 || REGNO (crtl->drap_reg) != R10_REG)
23830 return R10_REG;
23831
23832 /* The profiler is emitted after the prologue. If there is a
23833 caller-saved register which isn't live or a callee-saved
23834 register saved on stack in the prologue, use it. */
23835
23836 bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
23837
23838 int i;
23839 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23840 if (GENERAL_REGNO_P (i)
23841 && i != R10_REG
23842#ifdef NO_PROFILE_COUNTERS
23843 && (r11_ok || i != R11_REG)
23844#else
23845 && i != R11_REG
23846#endif
23847 && TEST_HARD_REG_BIT (accessible_reg_set, bit: i)
23848 && (ix86_save_reg (regno: i, maybe_eh_return: true, ignore_outlined: true)
23849 || (call_used_regs[i]
23850 && !fixed_regs[i]
23851 && !REGNO_REG_SET_P (reg_live, i))))
23852 return i;
23853
23854 sorry ("no register available for profiling %<-mcmodel=large%s%>",
23855 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
23856
23857 return R10_REG;
23858}
23859
23860/* Output assembler code to FILE to increment profiler label # LABELNO
23861 for profiling a function entry. */
23862void
23863x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23864{
23865 if (cfun->machine->insn_queued_at_entrance)
23866 {
23867 if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
23868 fprintf (stream: file, format: "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
23869 unsigned int patch_area_size
23870 = crtl->patch_area_size - crtl->patch_area_entry;
23871 if (patch_area_size)
23872 ix86_output_patchable_area (patch_area_size,
23873 crtl->patch_area_entry == 0);
23874 }
23875
23876 const char *mcount_name = MCOUNT_NAME;
23877
23878 bool fentry_section_p
23879 = (flag_record_mcount
23880 || lookup_attribute (attr_name: "fentry_section",
23881 DECL_ATTRIBUTES (current_function_decl)));
23882
23883 const char *label = fentry_section_p ? "1:" : "";
23884
23885 if (current_fentry_name (name: &mcount_name))
23886 ;
23887 else if (fentry_name)
23888 mcount_name = fentry_name;
23889 else if (flag_fentry)
23890 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
23891
23892 if (TARGET_64BIT)
23893 {
23894#ifndef NO_PROFILE_COUNTERS
23895 if (ASSEMBLER_DIALECT == ASM_INTEL)
23896 fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
23897 else
23898 fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
23899#endif
23900
23901 int scratch;
23902 const char *reg;
23903 char legacy_reg[4] = { 0 };
23904
23905 if (!TARGET_PECOFF)
23906 {
23907 switch (ix86_cmodel)
23908 {
23909 case CM_LARGE:
23910 scratch = x86_64_select_profile_regnum (r11_ok: true);
23911 reg = hi_reg_name[scratch];
23912 if (LEGACY_INT_REGNO_P (scratch))
23913 {
23914 legacy_reg[0] = 'r';
23915 legacy_reg[1] = reg[0];
23916 legacy_reg[2] = reg[1];
23917 reg = legacy_reg;
23918 }
23919 if (ASSEMBLER_DIALECT == ASM_INTEL)
23920 fprintf (stream: file, format: "%s\tmovabs\t%s, OFFSET FLAT:%s\n"
23921 "\tcall\t%s\n", label, reg, mcount_name,
23922 reg);
23923 else
23924 fprintf (stream: file, format: "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
23925 label, mcount_name, reg, reg);
23926 break;
23927 case CM_LARGE_PIC:
23928#ifdef NO_PROFILE_COUNTERS
23929 scratch = x86_64_select_profile_regnum (r11_ok: false);
23930 reg = hi_reg_name[scratch];
23931 if (LEGACY_INT_REGNO_P (scratch))
23932 {
23933 legacy_reg[0] = 'r';
23934 legacy_reg[1] = reg[0];
23935 legacy_reg[2] = reg[1];
23936 reg = legacy_reg;
23937 }
23938 if (ASSEMBLER_DIALECT == ASM_INTEL)
23939 {
23940 fprintf (stream: file, format: "1:movabs\tr11, "
23941 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
23942 fprintf (stream: file, format: "\tlea\t%s, 1b[rip]\n", reg);
23943 fprintf (stream: file, format: "\tadd\t%s, r11\n", reg);
23944 fprintf (stream: file, format: "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
23945 mcount_name);
23946 fprintf (stream: file, format: "\tadd\t%s, r11\n", reg);
23947 fprintf (stream: file, format: "\tcall\t%s\n", reg);
23948 break;
23949 }
23950 fprintf (stream: file,
23951 format: "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
23952 fprintf (stream: file, format: "\tleaq\t1b(%%rip), %%%s\n", reg);
23953 fprintf (stream: file, format: "\taddq\t%%r11, %%%s\n", reg);
23954 fprintf (stream: file, format: "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
23955 fprintf (stream: file, format: "\taddq\t%%r11, %%%s\n", reg);
23956 fprintf (stream: file, format: "\tcall\t*%%%s\n", reg);
23957#else
23958 sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
23959#endif
23960 break;
23961 case CM_SMALL_PIC:
23962 case CM_MEDIUM_PIC:
23963 if (!flag_plt)
23964 {
23965 if (ASSEMBLER_DIALECT == ASM_INTEL)
23966 fprintf (stream: file, format: "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
23967 label, mcount_name);
23968 else
23969 fprintf (stream: file, format: "%s\tcall\t*%s@GOTPCREL(%%rip)\n",
23970 label, mcount_name);
23971 break;
23972 }
23973 /* fall through */
23974 default:
23975 x86_print_call_or_nop (file, target: mcount_name, label);
23976 break;
23977 }
23978 }
23979 else
23980 x86_print_call_or_nop (file, target: mcount_name, label);
23981 }
23982 else if (flag_pic)
23983 {
23984#ifndef NO_PROFILE_COUNTERS
23985 if (ASSEMBLER_DIALECT == ASM_INTEL)
23986 fprintf (file,
23987 "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
23988 LPREFIX, labelno);
23989 else
23990 fprintf (file,
23991 "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
23992 LPREFIX, labelno);
23993#endif
23994 if (flag_plt)
23995 x86_print_call_or_nop (file, target: mcount_name, label);
23996 else if (ASSEMBLER_DIALECT == ASM_INTEL)
23997 fprintf (stream: file, format: "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n",
23998 label, mcount_name);
23999 else
24000 fprintf (stream: file, format: "%s\tcall\t*%s@GOT(%%ebx)\n",
24001 label, mcount_name);
24002 }
24003 else
24004 {
24005#ifndef NO_PROFILE_COUNTERS
24006 if (ASSEMBLER_DIALECT == ASM_INTEL)
24007 fprintf (file,
24008 "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
24009 LPREFIX, labelno);
24010 else
24011 fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
24012 LPREFIX, labelno);
24013#endif
24014 x86_print_call_or_nop (file, target: mcount_name, label);
24015 }
24016
24017 if (fentry_section_p)
24018 {
24019 const char *sname = "__mcount_loc";
24020
24021 if (current_fentry_section (name: &sname))
24022 ;
24023 else if (fentry_section)
24024 sname = fentry_section;
24025
24026 fprintf (stream: file, format: "\t.section %s, \"a\",@progbits\n", sname);
24027 fprintf (stream: file, format: "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
24028 fprintf (stream: file, format: "\t.previous\n");
24029 }
24030}
24031
24032/* We don't have exact information about the insn sizes, but we may assume
24033 quite safely that we are informed about all 1 byte insns and memory
24034 address sizes. This is enough to eliminate unnecessary padding in
24035 99% of cases. */
24036
24037int
24038ix86_min_insn_size (rtx_insn *insn)
24039{
24040 int l = 0, len;
24041
24042 if (!INSN_P (insn) || !active_insn_p (insn))
24043 return 0;
24044
24045 /* Discard alignments we've emit and jump instructions. */
24046 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
24047 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
24048 return 0;
24049
24050 /* Important case - calls are always 5 bytes.
24051 It is common to have many calls in the row. */
24052 if (CALL_P (insn)
24053 && symbolic_reference_mentioned_p (op: PATTERN (insn))
24054 && !SIBLING_CALL_P (insn))
24055 return 5;
24056 len = get_attr_length (insn);
24057 if (len <= 1)
24058 return 1;
24059
24060 /* For normal instructions we rely on get_attr_length being exact,
24061 with a few exceptions. */
24062 if (!JUMP_P (insn))
24063 {
24064 enum attr_type type = get_attr_type (insn);
24065
24066 switch (type)
24067 {
24068 case TYPE_MULTI:
24069 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
24070 || asm_noperands (PATTERN (insn)) >= 0)
24071 return 0;
24072 break;
24073 case TYPE_OTHER:
24074 case TYPE_FCMP:
24075 break;
24076 default:
24077 /* Otherwise trust get_attr_length. */
24078 return len;
24079 }
24080
24081 l = get_attr_length_address (insn);
24082 if (l < 4 && symbolic_reference_mentioned_p (op: PATTERN (insn)))
24083 l = 4;
24084 }
24085 if (l)
24086 return 1+l;
24087 else
24088 return 2;
24089}
24090
24091#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
24092
24093/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
24094 window. */
24095
24096static void
24097ix86_avoid_jump_mispredicts (void)
24098{
24099 rtx_insn *insn, *start = get_insns ();
24100 int nbytes = 0, njumps = 0;
24101 bool isjump = false;
24102
24103 /* Look for all minimal intervals of instructions containing 4 jumps.
24104 The intervals are bounded by START and INSN. NBYTES is the total
24105 size of instructions in the interval including INSN and not including
24106 START. When the NBYTES is smaller than 16 bytes, it is possible
24107 that the end of START and INSN ends up in the same 16byte page.
24108
24109 The smallest offset in the page INSN can start is the case where START
24110 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
24111 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
24112
24113 Don't consider asm goto as jump, while it can contain a jump, it doesn't
24114 have to, control transfer to label(s) can be performed through other
24115 means, and also we estimate minimum length of all asm stmts as 0. */
24116 for (insn = start; insn; insn = NEXT_INSN (insn))
24117 {
24118 int min_size;
24119
24120 if (LABEL_P (insn))
24121 {
24122 align_flags alignment = label_to_alignment (insn);
24123 int align = alignment.levels[0].log;
24124 int max_skip = alignment.levels[0].maxskip;
24125
24126 if (max_skip > 15)
24127 max_skip = 15;
24128 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
24129 already in the current 16 byte page, because otherwise
24130 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
24131 bytes to reach 16 byte boundary. */
24132 if (align <= 0
24133 || (align <= 3 && max_skip != (1 << align) - 1))
24134 max_skip = 0;
24135 if (dump_file)
24136 fprintf (stream: dump_file, format: "Label %i with max_skip %i\n",
24137 INSN_UID (insn), max_skip);
24138 if (max_skip)
24139 {
24140 while (nbytes + max_skip >= 16)
24141 {
24142 start = NEXT_INSN (insn: start);
24143 if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0)
24144 || CALL_P (start))
24145 njumps--, isjump = true;
24146 else
24147 isjump = false;
24148 nbytes -= ix86_min_insn_size (insn: start);
24149 }
24150 }
24151 continue;
24152 }
24153
24154 min_size = ix86_min_insn_size (insn);
24155 nbytes += min_size;
24156 if (dump_file)
24157 fprintf (stream: dump_file, format: "Insn %i estimated to %i bytes\n",
24158 INSN_UID (insn), min_size);
24159 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
24160 || CALL_P (insn))
24161 njumps++;
24162 else
24163 continue;
24164
24165 while (njumps > 3)
24166 {
24167 start = NEXT_INSN (insn: start);
24168 if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0)
24169 || CALL_P (start))
24170 njumps--, isjump = true;
24171 else
24172 isjump = false;
24173 nbytes -= ix86_min_insn_size (insn: start);
24174 }
24175 gcc_assert (njumps >= 0);
24176 if (dump_file)
24177 fprintf (stream: dump_file, format: "Interval %i to %i has %i bytes\n",
24178 INSN_UID (insn: start), INSN_UID (insn), nbytes);
24179
24180 if (njumps == 3 && isjump && nbytes < 16)
24181 {
24182 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
24183
24184 if (dump_file)
24185 fprintf (stream: dump_file, format: "Padding insn %i by %i bytes!\n",
24186 INSN_UID (insn), padsize);
24187 emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn);
24188 }
24189 }
24190}
24191#endif
24192
24193/* AMD Athlon works faster
24194 when RET is not destination of conditional jump or directly preceded
24195 by other jump instruction. We avoid the penalty by inserting NOP just
24196 before the RET instructions in such cases. */
24197static void
24198ix86_pad_returns (void)
24199{
24200 edge e;
24201 edge_iterator ei;
24202
24203 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24204 {
24205 basic_block bb = e->src;
24206 rtx_insn *ret = BB_END (bb);
24207 rtx_insn *prev;
24208 bool replace = false;
24209
24210 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
24211 || optimize_bb_for_size_p (bb))
24212 continue;
24213 for (prev = PREV_INSN (insn: ret); prev; prev = PREV_INSN (insn: prev))
24214 if (active_insn_p (prev) || LABEL_P (prev))
24215 break;
24216 if (prev && LABEL_P (prev))
24217 {
24218 edge e;
24219 edge_iterator ei;
24220
24221 FOR_EACH_EDGE (e, ei, bb->preds)
24222 if (EDGE_FREQUENCY (e) && e->src->index >= 0
24223 && !(e->flags & EDGE_FALLTHRU))
24224 {
24225 replace = true;
24226 break;
24227 }
24228 }
24229 if (!replace)
24230 {
24231 prev = prev_active_insn (ret);
24232 if (prev
24233 && ((JUMP_P (prev) && any_condjump_p (prev))
24234 || CALL_P (prev)))
24235 replace = true;
24236 /* Empty functions get branch mispredict even when
24237 the jump destination is not visible to us. */
24238 if (!prev && !optimize_function_for_size_p (cfun))
24239 replace = true;
24240 }
24241 if (replace)
24242 {
24243 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
24244 delete_insn (ret);
24245 }
24246 }
24247}
24248
24249/* Count the minimum number of instructions in BB. Return 4 if the
24250 number of instructions >= 4. */
24251
24252static int
24253ix86_count_insn_bb (basic_block bb)
24254{
24255 rtx_insn *insn;
24256 int insn_count = 0;
24257
24258 /* Count number of instructions in this block. Return 4 if the number
24259 of instructions >= 4. */
24260 FOR_BB_INSNS (bb, insn)
24261 {
24262 /* Only happen in exit blocks. */
24263 if (JUMP_P (insn)
24264 && ANY_RETURN_P (PATTERN (insn)))
24265 break;
24266
24267 if (NONDEBUG_INSN_P (insn)
24268 && GET_CODE (PATTERN (insn)) != USE
24269 && GET_CODE (PATTERN (insn)) != CLOBBER)
24270 {
24271 insn_count++;
24272 if (insn_count >= 4)
24273 return insn_count;
24274 }
24275 }
24276
24277 return insn_count;
24278}
24279
24280
24281/* Count the minimum number of instructions in code path in BB.
24282 Return 4 if the number of instructions >= 4. */
24283
24284static int
24285ix86_count_insn (basic_block bb)
24286{
24287 edge e;
24288 edge_iterator ei;
24289 int min_prev_count;
24290
24291 /* Only bother counting instructions along paths with no
24292 more than 2 basic blocks between entry and exit. Given
24293 that BB has an edge to exit, determine if a predecessor
24294 of BB has an edge from entry. If so, compute the number
24295 of instructions in the predecessor block. If there
24296 happen to be multiple such blocks, compute the minimum. */
24297 min_prev_count = 4;
24298 FOR_EACH_EDGE (e, ei, bb->preds)
24299 {
24300 edge prev_e;
24301 edge_iterator prev_ei;
24302
24303 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
24304 {
24305 min_prev_count = 0;
24306 break;
24307 }
24308 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
24309 {
24310 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
24311 {
24312 int count = ix86_count_insn_bb (bb: e->src);
24313 if (count < min_prev_count)
24314 min_prev_count = count;
24315 break;
24316 }
24317 }
24318 }
24319
24320 if (min_prev_count < 4)
24321 min_prev_count += ix86_count_insn_bb (bb);
24322
24323 return min_prev_count;
24324}
24325
24326/* Pad short function to 4 instructions. */
24327
24328static void
24329ix86_pad_short_function (void)
24330{
24331 edge e;
24332 edge_iterator ei;
24333
24334 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24335 {
24336 rtx_insn *ret = BB_END (e->src);
24337 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
24338 {
24339 int insn_count = ix86_count_insn (bb: e->src);
24340
24341 /* Pad short function. */
24342 if (insn_count < 4)
24343 {
24344 rtx_insn *insn = ret;
24345
24346 /* Find epilogue. */
24347 while (insn
24348 && (!NOTE_P (insn)
24349 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
24350 insn = PREV_INSN (insn);
24351
24352 if (!insn)
24353 insn = ret;
24354
24355 /* Two NOPs count as one instruction. */
24356 insn_count = 2 * (4 - insn_count);
24357 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
24358 }
24359 }
24360 }
24361}
24362
24363/* Fix up a Windows system unwinder issue. If an EH region falls through into
24364 the epilogue, the Windows system unwinder will apply epilogue logic and
24365 produce incorrect offsets. This can be avoided by adding a nop between
24366 the last insn that can throw and the first insn of the epilogue. */
24367
24368static void
24369ix86_seh_fixup_eh_fallthru (void)
24370{
24371 edge e;
24372 edge_iterator ei;
24373
24374 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24375 {
24376 rtx_insn *insn, *next;
24377
24378 /* Find the beginning of the epilogue. */
24379 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
24380 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
24381 break;
24382 if (insn == NULL)
24383 continue;
24384
24385 /* We only care about preceding insns that can throw. */
24386 insn = prev_active_insn (insn);
24387 if (insn == NULL || !can_throw_internal (insn))
24388 continue;
24389
24390 /* Do not separate calls from their debug information. */
24391 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (insn: next))
24392 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
24393 insn = next;
24394 else
24395 break;
24396
24397 emit_insn_after (gen_nops (const1_rtx), insn);
24398 }
24399}
24400/* Split vector load from parm_decl to elemental loads to avoid STLF
24401 stalls. */
24402static void
24403ix86_split_stlf_stall_load ()
24404{
24405 rtx_insn* insn, *start = get_insns ();
24406 unsigned window = 0;
24407
24408 for (insn = start; insn; insn = NEXT_INSN (insn))
24409 {
24410 if (!NONDEBUG_INSN_P (insn))
24411 continue;
24412 window++;
24413 /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
24414 other, just emulate for pipeline) before stalled load, stlf stall
24415 case is as fast as no stall cases on CLX.
24416 Since CFG is freed before machine_reorg, just do a rough
24417 calculation of the window according to the layout. */
24418 if (window > (unsigned) x86_stlf_window_ninsns)
24419 return;
24420
24421 if (any_uncondjump_p (insn)
24422 || ANY_RETURN_P (PATTERN (insn))
24423 || CALL_P (insn))
24424 return;
24425
24426 rtx set = single_set (insn);
24427 if (!set)
24428 continue;
24429 rtx src = SET_SRC (set);
24430 if (!MEM_P (src)
24431 /* Only handle V2DFmode load since it doesn't need any scratch
24432 register. */
24433 || GET_MODE (src) != E_V2DFmode
24434 || !MEM_EXPR (src)
24435 || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
24436 continue;
24437
24438 rtx zero = CONST0_RTX (V2DFmode);
24439 rtx dest = SET_DEST (set);
24440 rtx m = adjust_address (src, DFmode, 0);
24441 rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
24442 emit_insn_before (loadlpd, insn);
24443 m = adjust_address (src, DFmode, 8);
24444 rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
24445 if (dump_file && (dump_flags & TDF_DETAILS))
24446 {
24447 fputs (s: "Due to potential STLF stall, split instruction:\n",
24448 stream: dump_file);
24449 print_rtl_single (dump_file, insn);
24450 fputs (s: "To:\n", stream: dump_file);
24451 print_rtl_single (dump_file, loadlpd);
24452 print_rtl_single (dump_file, loadhpd);
24453 }
24454 PATTERN (insn) = loadhpd;
24455 INSN_CODE (insn) = -1;
24456 gcc_assert (recog_memoized (insn) != -1);
24457 }
24458}
24459
24460/* Implement machine specific optimizations. We implement padding of returns
24461 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
24462static void
24463ix86_reorg (void)
24464{
24465 /* We are freeing block_for_insn in the toplev to keep compatibility
24466 with old MDEP_REORGS that are not CFG based. Recompute it now. */
24467 compute_bb_for_insn ();
24468
24469 if (TARGET_SEH && current_function_has_exception_handlers ())
24470 ix86_seh_fixup_eh_fallthru ();
24471
24472 if (optimize && optimize_function_for_speed_p (cfun))
24473 {
24474 if (TARGET_SSE2)
24475 ix86_split_stlf_stall_load ();
24476 if (TARGET_PAD_SHORT_FUNCTION)
24477 ix86_pad_short_function ();
24478 else if (TARGET_PAD_RETURNS)
24479 ix86_pad_returns ();
24480#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
24481 if (TARGET_FOUR_JUMP_LIMIT)
24482 ix86_avoid_jump_mispredicts ();
24483#endif
24484 }
24485}
24486
24487/* Return nonzero when QImode register that must be represented via REX prefix
24488 is used. */
24489bool
24490x86_extended_QIreg_mentioned_p (rtx_insn *insn)
24491{
24492 int i;
24493 extract_insn_cached (insn);
24494 for (i = 0; i < recog_data.n_operands; i++)
24495 if (GENERAL_REG_P (recog_data.operand[i])
24496 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
24497 return true;
24498 return false;
24499}
24500
24501/* Return true when INSN mentions register that must be encoded using REX
24502 prefix. */
24503bool
24504x86_extended_reg_mentioned_p (rtx insn)
24505{
24506 subrtx_iterator::array_type array;
24507 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
24508 {
24509 const_rtx x = *iter;
24510 if (REG_P (x)
24511 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
24512 || REX2_INT_REGNO_P (REGNO (x))))
24513 return true;
24514 }
24515 return false;
24516}
24517
24518/* Return true when INSN mentions register that must be encoded using REX2
24519 prefix. */
24520bool
24521x86_extended_rex2reg_mentioned_p (rtx insn)
24522{
24523 subrtx_iterator::array_type array;
24524 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
24525 {
24526 const_rtx x = *iter;
24527 if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
24528 return true;
24529 }
24530 return false;
24531}
24532
24533/* Return true when rtx operands mentions register that must be encoded using
24534 evex prefix. */
24535bool
24536x86_evex_reg_mentioned_p (rtx operands[], int nops)
24537{
24538 int i;
24539 for (i = 0; i < nops; i++)
24540 if (EXT_REX_SSE_REG_P (operands[i])
24541 || x86_extended_rex2reg_mentioned_p (insn: operands[i]))
24542 return true;
24543 return false;
24544}
24545
24546/* If profitable, negate (without causing overflow) integer constant
24547 of mode MODE at location LOC. Return true in this case. */
24548bool
24549x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
24550{
24551 HOST_WIDE_INT val;
24552
24553 if (!CONST_INT_P (*loc))
24554 return false;
24555
24556 switch (mode)
24557 {
24558 case E_DImode:
24559 /* DImode x86_64 constants must fit in 32 bits. */
24560 gcc_assert (x86_64_immediate_operand (*loc, mode));
24561
24562 mode = SImode;
24563 break;
24564
24565 case E_SImode:
24566 case E_HImode:
24567 case E_QImode:
24568 break;
24569
24570 default:
24571 gcc_unreachable ();
24572 }
24573
24574 /* Avoid overflows. */
24575 if (mode_signbit_p (mode, *loc))
24576 return false;
24577
24578 val = INTVAL (*loc);
24579
24580 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
24581 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
24582 if ((val < 0 && val != -128)
24583 || val == 128)
24584 {
24585 *loc = GEN_INT (-val);
24586 return true;
24587 }
24588
24589 return false;
24590}
24591
24592/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
24593 optabs would emit if we didn't have TFmode patterns. */
24594
24595void
24596x86_emit_floatuns (rtx operands[2])
24597{
24598 rtx_code_label *neglab, *donelab;
24599 rtx i0, i1, f0, in, out;
24600 machine_mode mode, inmode;
24601
24602 inmode = GET_MODE (operands[1]);
24603 gcc_assert (inmode == SImode || inmode == DImode);
24604
24605 out = operands[0];
24606 in = force_reg (inmode, operands[1]);
24607 mode = GET_MODE (out);
24608 neglab = gen_label_rtx ();
24609 donelab = gen_label_rtx ();
24610 f0 = gen_reg_rtx (mode);
24611
24612 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
24613
24614 expand_float (out, in, 0);
24615
24616 emit_jump_insn (gen_jump (donelab));
24617 emit_barrier ();
24618
24619 emit_label (neglab);
24620
24621 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
24622 1, OPTAB_DIRECT);
24623 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
24624 1, OPTAB_DIRECT);
24625 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
24626
24627 expand_float (f0, i0, 0);
24628
24629 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
24630
24631 emit_label (donelab);
24632}
24633
24634/* Return the diagnostic message string if conversion from FROMTYPE to
24635 TOTYPE is not allowed, NULL otherwise. */
24636
24637static const char *
24638ix86_invalid_conversion (const_tree fromtype, const_tree totype)
24639{
24640 machine_mode from_mode = element_mode (fromtype);
24641 machine_mode to_mode = element_mode (totype);
24642
24643 if (!TARGET_SSE2 && from_mode != to_mode)
24644 {
24645 /* Do no allow conversions to/from BFmode/HFmode scalar types
24646 when TARGET_SSE2 is not available. */
24647 if (from_mode == BFmode)
24648 return N_("invalid conversion from type %<__bf16%> "
24649 "without option %<-msse2%>");
24650 if (from_mode == HFmode)
24651 return N_("invalid conversion from type %<_Float16%> "
24652 "without option %<-msse2%>");
24653 if (to_mode == BFmode)
24654 return N_("invalid conversion to type %<__bf16%> "
24655 "without option %<-msse2%>");
24656 if (to_mode == HFmode)
24657 return N_("invalid conversion to type %<_Float16%> "
24658 "without option %<-msse2%>");
24659 }
24660
24661 /* Warn for silent implicit conversion between __bf16 and short,
24662 since __bfloat16 is refined as real __bf16 instead of short
24663 since GCC13. */
24664 if (element_mode (fromtype) != element_mode (totype)
24665 && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
24666 {
24667 /* Warn for silent implicit conversion where user may expect
24668 a bitcast. */
24669 if ((TYPE_MODE (fromtype) == BFmode
24670 && TYPE_MODE (totype) == HImode)
24671 || (TYPE_MODE (totype) == BFmode
24672 && TYPE_MODE (fromtype) == HImode))
24673 warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
24674 "to real %<__bf16%> since GCC 13.1, be careful of "
24675 "implicit conversion between %<__bf16%> and %<short%>; "
24676 "an explicit bitcast may be needed here");
24677 }
24678
24679 /* Conversion allowed. */
24680 return NULL;
24681}
24682
24683/* Return the diagnostic message string if the unary operation OP is
24684 not permitted on TYPE, NULL otherwise. */
24685
24686static const char *
24687ix86_invalid_unary_op (int op, const_tree type)
24688{
24689 machine_mode mmode = element_mode (type);
24690 /* Reject all single-operand operations on BFmode/HFmode except for &
24691 when TARGET_SSE2 is not available. */
24692 if (!TARGET_SSE2 && op != ADDR_EXPR)
24693 {
24694 if (mmode == BFmode)
24695 return N_("operation not permitted on type %<__bf16%> "
24696 "without option %<-msse2%>");
24697 if (mmode == HFmode)
24698 return N_("operation not permitted on type %<_Float16%> "
24699 "without option %<-msse2%>");
24700 }
24701
24702 /* Operation allowed. */
24703 return NULL;
24704}
24705
24706/* Return the diagnostic message string if the binary operation OP is
24707 not permitted on TYPE1 and TYPE2, NULL otherwise. */
24708
24709static const char *
24710ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
24711 const_tree type2)
24712{
24713 machine_mode type1_mode = element_mode (type1);
24714 machine_mode type2_mode = element_mode (type2);
24715 /* Reject all 2-operand operations on BFmode or HFmode
24716 when TARGET_SSE2 is not available. */
24717 if (!TARGET_SSE2)
24718 {
24719 if (type1_mode == BFmode || type2_mode == BFmode)
24720 return N_("operation not permitted on type %<__bf16%> "
24721 "without option %<-msse2%>");
24722
24723 if (type1_mode == HFmode || type2_mode == HFmode)
24724 return N_("operation not permitted on type %<_Float16%> "
24725 "without option %<-msse2%>");
24726 }
24727
24728 /* Operation allowed. */
24729 return NULL;
24730}
24731
24732
24733/* Target hook for scalar_mode_supported_p. */
24734static bool
24735ix86_scalar_mode_supported_p (scalar_mode mode)
24736{
24737 if (DECIMAL_FLOAT_MODE_P (mode))
24738 return default_decimal_float_supported_p ();
24739 else if (mode == TFmode)
24740 return true;
24741 else if (mode == HFmode || mode == BFmode)
24742 return true;
24743 else
24744 return default_scalar_mode_supported_p (mode);
24745}
24746
24747/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
24748 if MODE is HFmode, and punt to the generic implementation otherwise. */
24749
24750static bool
24751ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
24752{
24753 /* NB: Always return TRUE for HFmode so that the _Float16 type will
24754 be defined by the C front-end for AVX512FP16 intrinsics. We will
24755 issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
24756 enabled. */
24757 return ((mode == HFmode || mode == BFmode)
24758 ? true
24759 : default_libgcc_floating_mode_supported_p (mode));
24760}
24761
24762/* Implements target hook vector_mode_supported_p. */
24763static bool
24764ix86_vector_mode_supported_p (machine_mode mode)
24765{
24766 /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
24767 either. */
24768 if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
24769 return false;
24770 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24771 return true;
24772 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24773 return true;
24774 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
24775 return true;
24776 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
24777 return true;
24778 if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
24779 && VALID_MMX_REG_MODE (mode))
24780 return true;
24781 if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
24782 && VALID_MMX_REG_MODE_3DNOW (mode))
24783 return true;
24784 if (mode == V2QImode)
24785 return true;
24786 return false;
24787}
24788
24789/* Target hook for c_mode_for_suffix. */
24790static machine_mode
24791ix86_c_mode_for_suffix (char suffix)
24792{
24793 if (suffix == 'q')
24794 return TFmode;
24795 if (suffix == 'w')
24796 return XFmode;
24797
24798 return VOIDmode;
24799}
24800
24801/* Helper function to map common constraints to non-EGPR ones.
24802 All related constraints have h prefix, and h plus Upper letter
24803 means the constraint is strictly EGPR enabled, while h plus
24804 lower letter indicates the constraint is strictly gpr16 only.
24805
24806 Specially for "g" constraint, split it to rmi as there is
24807 no corresponding general constraint define for backend.
24808
24809 Here is the full list to map constraints that may involve
24810 gpr to h prefixed.
24811
24812 "g" -> "jrjmi"
24813 "r" -> "jr"
24814 "m" -> "jm"
24815 "<" -> "j<"
24816 ">" -> "j>"
24817 "o" -> "jo"
24818 "V" -> "jV"
24819 "p" -> "jp"
24820 "Bm" -> "ja"
24821*/
24822
24823static void map_egpr_constraints (vec<const char *> &constraints)
24824{
24825 for (size_t i = 0; i < constraints.length(); i++)
24826 {
24827 const char *cur = constraints[i];
24828
24829 if (startswith (str: cur, prefix: "=@cc"))
24830 continue;
24831
24832 int len = strlen (s: cur);
24833 auto_vec<char> buf;
24834
24835 for (int j = 0; j < len; j++)
24836 {
24837 switch (cur[j])
24838 {
24839 case 'g':
24840 buf.safe_push (obj: 'j');
24841 buf.safe_push (obj: 'r');
24842 buf.safe_push (obj: 'j');
24843 buf.safe_push (obj: 'm');
24844 buf.safe_push (obj: 'i');
24845 break;
24846 case 'r':
24847 case 'm':
24848 case '<':
24849 case '>':
24850 case 'o':
24851 case 'V':
24852 case 'p':
24853 buf.safe_push (obj: 'j');
24854 buf.safe_push (obj: cur[j]);
24855 break;
24856 case 'B':
24857 if (cur[j + 1] == 'm')
24858 {
24859 buf.safe_push (obj: 'j');
24860 buf.safe_push (obj: 'a');
24861 j++;
24862 }
24863 else
24864 {
24865 buf.safe_push (obj: cur[j]);
24866 buf.safe_push (obj: cur[j + 1]);
24867 j++;
24868 }
24869 break;
24870 case 'T':
24871 case 'Y':
24872 case 'W':
24873 case 'j':
24874 buf.safe_push (obj: cur[j]);
24875 buf.safe_push (obj: cur[j + 1]);
24876 j++;
24877 break;
24878 case '{':
24879 do
24880 {
24881 buf.safe_push (obj: cur[j]);
24882 } while (cur[j++] != '}');
24883 break;
24884 default:
24885 buf.safe_push (obj: cur[j]);
24886 break;
24887 }
24888 }
24889 buf.safe_push (obj: '\0');
24890 constraints[i] = xstrdup (buf.address ());
24891 }
24892}
24893
24894/* Worker function for TARGET_MD_ASM_ADJUST.
24895
24896 We implement asm flag outputs, and maintain source compatibility
24897 with the old cc0-based compiler. */
24898
24899static rtx_insn *
24900ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
24901 vec<machine_mode> & /*input_modes*/,
24902 vec<const char *> &constraints, vec<rtx> &/*uses*/,
24903 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
24904 location_t loc)
24905{
24906 bool saw_asm_flag = false;
24907
24908 start_sequence ();
24909
24910 if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
24911 map_egpr_constraints (constraints);
24912
24913 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
24914 {
24915 const char *con = constraints[i];
24916 if (!startswith (str: con, prefix: "=@cc"))
24917 continue;
24918 con += 4;
24919 if (strchr (s: con, c: ',') != NULL)
24920 {
24921 error_at (loc, "alternatives not allowed in %<asm%> flag output");
24922 continue;
24923 }
24924
24925 bool invert = false;
24926 if (con[0] == 'n')
24927 invert = true, con++;
24928
24929 machine_mode mode = CCmode;
24930 rtx_code code = UNKNOWN;
24931
24932 switch (con[0])
24933 {
24934 case 'a':
24935 if (con[1] == 0)
24936 mode = CCAmode, code = EQ;
24937 else if (con[1] == 'e' && con[2] == 0)
24938 mode = CCCmode, code = NE;
24939 break;
24940 case 'b':
24941 if (con[1] == 0)
24942 mode = CCCmode, code = EQ;
24943 else if (con[1] == 'e' && con[2] == 0)
24944 mode = CCAmode, code = NE;
24945 break;
24946 case 'c':
24947 if (con[1] == 0)
24948 mode = CCCmode, code = EQ;
24949 break;
24950 case 'e':
24951 if (con[1] == 0)
24952 mode = CCZmode, code = EQ;
24953 break;
24954 case 'g':
24955 if (con[1] == 0)
24956 mode = CCGCmode, code = GT;
24957 else if (con[1] == 'e' && con[2] == 0)
24958 mode = CCGCmode, code = GE;
24959 break;
24960 case 'l':
24961 if (con[1] == 0)
24962 mode = CCGCmode, code = LT;
24963 else if (con[1] == 'e' && con[2] == 0)
24964 mode = CCGCmode, code = LE;
24965 break;
24966 case 'o':
24967 if (con[1] == 0)
24968 mode = CCOmode, code = EQ;
24969 break;
24970 case 'p':
24971 if (con[1] == 0)
24972 mode = CCPmode, code = EQ;
24973 break;
24974 case 's':
24975 if (con[1] == 0)
24976 mode = CCSmode, code = EQ;
24977 break;
24978 case 'z':
24979 if (con[1] == 0)
24980 mode = CCZmode, code = EQ;
24981 break;
24982 }
24983 if (code == UNKNOWN)
24984 {
24985 error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
24986 continue;
24987 }
24988 if (invert)
24989 code = reverse_condition (code);
24990
24991 rtx dest = outputs[i];
24992 if (!saw_asm_flag)
24993 {
24994 /* This is the first asm flag output. Here we put the flags
24995 register in as the real output and adjust the condition to
24996 allow it. */
24997 constraints[i] = "=Bf";
24998 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
24999 saw_asm_flag = true;
25000 }
25001 else
25002 {
25003 /* We don't need the flags register as output twice. */
25004 constraints[i] = "=X";
25005 outputs[i] = gen_rtx_SCRATCH (SImode);
25006 }
25007
25008 rtx x = gen_rtx_REG (mode, FLAGS_REG);
25009 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
25010
25011 machine_mode dest_mode = GET_MODE (dest);
25012 if (!SCALAR_INT_MODE_P (dest_mode))
25013 {
25014 error_at (loc, "invalid type for %<asm%> flag output");
25015 continue;
25016 }
25017
25018 if (dest_mode == QImode)
25019 emit_insn (gen_rtx_SET (dest, x));
25020 else
25021 {
25022 rtx reg = gen_reg_rtx (QImode);
25023 emit_insn (gen_rtx_SET (reg, x));
25024
25025 reg = convert_to_mode (dest_mode, reg, 1);
25026 emit_move_insn (dest, reg);
25027 }
25028 }
25029
25030 rtx_insn *seq = end_sequence ();
25031
25032 if (saw_asm_flag)
25033 return seq;
25034 else
25035 {
25036 /* If we had no asm flag outputs, clobber the flags. */
25037 clobbers.safe_push (obj: gen_rtx_REG (CCmode, FLAGS_REG));
25038 SET_HARD_REG_BIT (set&: clobbered_regs, FLAGS_REG);
25039 return NULL;
25040 }
25041}
25042
25043/* Implements target vector targetm.asm.encode_section_info. */
25044
25045static void ATTRIBUTE_UNUSED
25046ix86_encode_section_info (tree decl, rtx rtl, int first)
25047{
25048 default_encode_section_info (decl, rtl, first);
25049
25050 if (ix86_in_large_data_p (exp: decl))
25051 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
25052}
25053
25054/* Worker function for REVERSE_CONDITION. */
25055
25056enum rtx_code
25057ix86_reverse_condition (enum rtx_code code, machine_mode mode)
25058{
25059 return (mode == CCFPmode
25060 ? reverse_condition_maybe_unordered (code)
25061 : reverse_condition (code));
25062}
25063
25064/* Output code to perform an x87 FP register move, from OPERANDS[1]
25065 to OPERANDS[0]. */
25066
25067const char *
25068output_387_reg_move (rtx_insn *insn, rtx *operands)
25069{
25070 if (REG_P (operands[0]))
25071 {
25072 if (REG_P (operands[1])
25073 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25074 {
25075 if (REGNO (operands[0]) == FIRST_STACK_REG)
25076 return output_387_ffreep (operands, opno: 0);
25077 return "fstp\t%y0";
25078 }
25079 if (STACK_TOP_P (operands[0]))
25080 return "fld%Z1\t%y1";
25081 return "fst\t%y0";
25082 }
25083 else if (MEM_P (operands[0]))
25084 {
25085 gcc_assert (REG_P (operands[1]));
25086 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25087 return "fstp%Z0\t%y0";
25088 else
25089 {
25090 /* There is no non-popping store to memory for XFmode.
25091 So if we need one, follow the store with a load. */
25092 if (GET_MODE (operands[0]) == XFmode)
25093 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
25094 else
25095 return "fst%Z0\t%y0";
25096 }
25097 }
25098 else
25099 gcc_unreachable();
25100}
25101#ifdef TARGET_SOLARIS
25102/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
25103
25104static void
25105i386_solaris_elf_named_section (const char *name, unsigned int flags,
25106 tree decl)
25107{
25108 /* With Binutils 2.15, the "@unwind" marker must be specified on
25109 every occurrence of the ".eh_frame" section, not just the first
25110 one. */
25111 if (TARGET_64BIT
25112 && strcmp (name, ".eh_frame") == 0)
25113 {
25114 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
25115 flags & SECTION_WRITE ? "aw" : "a");
25116 return;
25117 }
25118
25119#if !HAVE_GNU_AS
25120 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
25121 {
25122 solaris_elf_asm_comdat_section (name, flags, decl);
25123 return;
25124 }
25125
25126 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
25127 SPARC assembler. One cannot mix single-letter flags and #exclude, so
25128 only emit the latter here. */
25129 if (flags & SECTION_EXCLUDE)
25130 {
25131 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
25132 return;
25133 }
25134#endif
25135
25136 default_elf_asm_named_section (name, flags, decl);
25137}
25138#endif /* TARGET_SOLARIS */
25139
25140/* Return the mangling of TYPE if it is an extended fundamental type. */
25141
25142static const char *
25143ix86_mangle_type (const_tree type)
25144{
25145 type = TYPE_MAIN_VARIANT (type);
25146
25147 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
25148 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
25149 return NULL;
25150
25151 if (type == float128_type_node || type == float64x_type_node)
25152 return NULL;
25153
25154 switch (TYPE_MODE (type))
25155 {
25156 case E_BFmode:
25157 return "DF16b";
25158 case E_HFmode:
25159 /* _Float16 is "DF16_".
25160 Align with clang's decision in https://reviews.llvm.org/D33719. */
25161 return "DF16_";
25162 case E_TFmode:
25163 /* __float128 is "g". */
25164 return "g";
25165 case E_XFmode:
25166 /* "long double" or __float80 is "e". */
25167 return "e";
25168 default:
25169 return NULL;
25170 }
25171}
25172
25173/* Create C++ tinfo symbols for only conditionally available fundamental
25174 types. */
25175
25176static void
25177ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
25178{
25179 extern tree ix86_float16_type_node;
25180 extern tree ix86_bf16_type_node;
25181
25182 if (!TARGET_SSE2)
25183 {
25184 if (!float16_type_node)
25185 float16_type_node = ix86_float16_type_node;
25186 if (!bfloat16_type_node)
25187 bfloat16_type_node = ix86_bf16_type_node;
25188 callback (float16_type_node);
25189 callback (bfloat16_type_node);
25190 float16_type_node = NULL_TREE;
25191 bfloat16_type_node = NULL_TREE;
25192 }
25193}
25194
25195static GTY(()) tree ix86_tls_stack_chk_guard_decl;
25196
25197static tree
25198ix86_stack_protect_guard (void)
25199{
25200 if (TARGET_SSP_TLS_GUARD)
25201 {
25202 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
25203 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
25204 tree type = build_qualified_type (type_node, qual);
25205 tree t;
25206
25207 if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
25208 {
25209 t = ix86_tls_stack_chk_guard_decl;
25210
25211 if (t == NULL)
25212 {
25213 rtx x;
25214
25215 t = build_decl
25216 (UNKNOWN_LOCATION, VAR_DECL,
25217 get_identifier (ix86_stack_protector_guard_symbol_str),
25218 type);
25219 TREE_STATIC (t) = 1;
25220 TREE_PUBLIC (t) = 1;
25221 DECL_EXTERNAL (t) = 1;
25222 TREE_USED (t) = 1;
25223 TREE_THIS_VOLATILE (t) = 1;
25224 DECL_ARTIFICIAL (t) = 1;
25225 DECL_IGNORED_P (t) = 1;
25226
25227 /* Do not share RTL as the declaration is visible outside of
25228 current function. */
25229 x = DECL_RTL (t);
25230 RTX_FLAG (x, used) = 1;
25231
25232 ix86_tls_stack_chk_guard_decl = t;
25233 }
25234 }
25235 else
25236 {
25237 tree asptrtype = build_pointer_type (type);
25238
25239 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
25240 t = build2 (MEM_REF, asptrtype, t,
25241 build_int_cst (asptrtype, 0));
25242 TREE_THIS_VOLATILE (t) = 1;
25243 }
25244
25245 return t;
25246 }
25247
25248 return default_stack_protect_guard ();
25249}
25250
25251static bool
25252ix86_stack_protect_runtime_enabled_p (void)
25253{
25254 /* Naked functions should not enable stack protector. */
25255 return !ix86_function_naked (fn: current_function_decl);
25256}
25257
25258/* For 32-bit code we can save PIC register setup by using
25259 __stack_chk_fail_local hidden function instead of calling
25260 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
25261 register, so it is better to call __stack_chk_fail directly. */
25262
25263static tree ATTRIBUTE_UNUSED
25264ix86_stack_protect_fail (void)
25265{
25266 return TARGET_64BIT
25267 ? default_external_stack_protect_fail ()
25268 : default_hidden_stack_protect_fail ();
25269}
25270
25271/* Select a format to encode pointers in exception handling data. CODE
25272 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
25273 true if the symbol may be affected by dynamic relocations.
25274
25275 ??? All x86 object file formats are capable of representing this.
25276 After all, the relocation needed is the same as for the call insn.
25277 Whether or not a particular assembler allows us to enter such, I
25278 guess we'll have to see. */
25279
25280int
25281asm_preferred_eh_data_format (int code, int global)
25282{
25283 /* PE-COFF is effectively always -fPIC because of the .reloc section. */
25284 if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
25285 {
25286 int type = DW_EH_PE_sdata8;
25287 if (ptr_mode == SImode
25288 || ix86_cmodel == CM_SMALL_PIC
25289 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25290 type = DW_EH_PE_sdata4;
25291 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25292 }
25293
25294 if (ix86_cmodel == CM_SMALL
25295 || (ix86_cmodel == CM_MEDIUM && code))
25296 return DW_EH_PE_udata4;
25297
25298 return DW_EH_PE_absptr;
25299}
25300
25301/* Worker for ix86_builtin_vectorization_cost and the fallback calls
25302 from ix86_vector_costs::add_stmt_cost. */
25303static int
25304ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
25305 machine_mode mode)
25306{
25307 bool fp = FLOAT_MODE_P (mode);
25308 int index;
25309 switch (type_of_cost)
25310 {
25311 case scalar_stmt:
25312 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
25313
25314 case scalar_load:
25315 /* load/store costs are relative to register move which is 2. Recompute
25316 it to COSTS_N_INSNS so everything have same base. */
25317 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
25318 : ix86_cost->int_load [2]) / 2;
25319
25320 case scalar_store:
25321 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
25322 : ix86_cost->int_store [2]) / 2;
25323
25324 case vector_stmt:
25325 return ix86_vec_cost (mode,
25326 cost: fp ? ix86_cost->addss : ix86_cost->sse_op);
25327
25328 case vector_load:
25329 index = sse_store_index (mode);
25330 /* See PR82713 - we may end up being called on non-vector type. */
25331 if (index < 0)
25332 index = 2;
25333 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
25334
25335 case vector_store:
25336 index = sse_store_index (mode);
25337 /* See PR82713 - we may end up being called on non-vector type. */
25338 if (index < 0)
25339 index = 2;
25340 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
25341
25342 case vec_to_scalar:
25343 case scalar_to_vec:
25344 return ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25345
25346 /* We should have separate costs for unaligned loads and gather/scatter.
25347 Do that incrementally. */
25348 case unaligned_load:
25349 index = sse_store_index (mode);
25350 /* See PR82713 - we may end up being called on non-vector type. */
25351 if (index < 0)
25352 index = 2;
25353 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
25354
25355 case unaligned_store:
25356 index = sse_store_index (mode);
25357 /* See PR82713 - we may end up being called on non-vector type. */
25358 if (index < 0)
25359 index = 2;
25360 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
25361
25362 case vector_gather_load:
25363 return ix86_vec_cost (mode,
25364 COSTS_N_INSNS
25365 (ix86_cost->gather_static
25366 + ix86_cost->gather_per_elt
25367 * GET_MODE_NUNITS (mode)) / 2);
25368
25369 case vector_scatter_store:
25370 return ix86_vec_cost (mode,
25371 COSTS_N_INSNS
25372 (ix86_cost->scatter_static
25373 + ix86_cost->scatter_per_elt
25374 * GET_MODE_NUNITS (mode)) / 2);
25375
25376 case cond_branch_taken:
25377 return ix86_cost->cond_taken_branch_cost;
25378
25379 case cond_branch_not_taken:
25380 return ix86_cost->cond_not_taken_branch_cost;
25381
25382 case vec_perm:
25383 return ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25384
25385 case vec_promote_demote:
25386 if (fp)
25387 return vec_fp_conversion_cost (cost: ix86_tune_cost, size: mode);
25388 return ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25389
25390 case vec_construct:
25391 {
25392 int n = GET_MODE_NUNITS (mode);
25393 /* N - 1 element inserts into an SSE vector, the possible
25394 GPR -> XMM move is accounted for in add_stmt_cost. */
25395 if (GET_MODE_BITSIZE (mode) <= 128)
25396 return (n - 1) * ix86_cost->sse_op;
25397 /* One vinserti128 for combining two SSE vectors for AVX256. */
25398 else if (GET_MODE_BITSIZE (mode) == 256)
25399 return ((n - 2) * ix86_cost->sse_op
25400 + ix86_vec_cost (mode, cost: ix86_cost->sse_op));
25401 /* One vinserti64x4 and two vinserti128 for combining SSE
25402 and AVX256 vectors to AVX512. */
25403 else if (GET_MODE_BITSIZE (mode) == 512)
25404 {
25405 machine_mode half_mode
25406 = mode_for_vector (GET_MODE_INNER (mode),
25407 GET_MODE_NUNITS (mode) / 2).require ();
25408 return ((n - 4) * ix86_cost->sse_op
25409 + 2 * ix86_vec_cost (mode: half_mode, cost: ix86_cost->sse_op)
25410 + ix86_vec_cost (mode, cost: ix86_cost->sse_op));
25411 }
25412 gcc_unreachable ();
25413 }
25414
25415 default:
25416 gcc_unreachable ();
25417 }
25418}
25419
25420/* Implement targetm.vectorize.builtin_vectorization_cost. */
25421static int
25422ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
25423 tree vectype, int)
25424{
25425 machine_mode mode = TImode;
25426 if (vectype != NULL)
25427 mode = TYPE_MODE (vectype);
25428 return ix86_default_vector_cost (type_of_cost, mode);
25429}
25430
25431
25432/* This function returns the calling abi specific va_list type node.
25433 It returns the FNDECL specific va_list type. */
25434
25435static tree
25436ix86_fn_abi_va_list (tree fndecl)
25437{
25438 if (!TARGET_64BIT)
25439 return va_list_type_node;
25440 gcc_assert (fndecl != NULL_TREE);
25441
25442 if (ix86_function_abi (fndecl: (const_tree) fndecl) == MS_ABI)
25443 return ms_va_list_type_node;
25444 else
25445 return sysv_va_list_type_node;
25446}
25447
25448/* Returns the canonical va_list type specified by TYPE. If there
25449 is no valid TYPE provided, it return NULL_TREE. */
25450
25451static tree
25452ix86_canonical_va_list_type (tree type)
25453{
25454 if (TARGET_64BIT)
25455 {
25456 if (lookup_attribute (attr_name: "ms_abi va_list", TYPE_ATTRIBUTES (type)))
25457 return ms_va_list_type_node;
25458
25459 if ((TREE_CODE (type) == ARRAY_TYPE
25460 && integer_zerop (array_type_nelts_minus_one (type)))
25461 || POINTER_TYPE_P (type))
25462 {
25463 tree elem_type = TREE_TYPE (type);
25464 if (TREE_CODE (elem_type) == RECORD_TYPE
25465 && lookup_attribute (attr_name: "sysv_abi va_list",
25466 TYPE_ATTRIBUTES (elem_type)))
25467 return sysv_va_list_type_node;
25468 }
25469
25470 return NULL_TREE;
25471 }
25472
25473 return std_canonical_va_list_type (type);
25474}
25475
25476/* Iterate through the target-specific builtin types for va_list.
25477 IDX denotes the iterator, *PTREE is set to the result type of
25478 the va_list builtin, and *PNAME to its internal type.
25479 Returns zero if there is no element for this index, otherwise
25480 IDX should be increased upon the next call.
25481 Note, do not iterate a base builtin's name like __builtin_va_list.
25482 Used from c_common_nodes_and_builtins. */
25483
25484static int
25485ix86_enum_va_list (int idx, const char **pname, tree *ptree)
25486{
25487 if (TARGET_64BIT)
25488 {
25489 switch (idx)
25490 {
25491 default:
25492 break;
25493
25494 case 0:
25495 *ptree = ms_va_list_type_node;
25496 *pname = "__builtin_ms_va_list";
25497 return 1;
25498
25499 case 1:
25500 *ptree = sysv_va_list_type_node;
25501 *pname = "__builtin_sysv_va_list";
25502 return 1;
25503 }
25504 }
25505
25506 return 0;
25507}
25508
25509#undef TARGET_SCHED_DISPATCH
25510#define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
25511#undef TARGET_SCHED_DISPATCH_DO
25512#define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
25513#undef TARGET_SCHED_REASSOCIATION_WIDTH
25514#define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
25515#undef TARGET_SCHED_REORDER
25516#define TARGET_SCHED_REORDER ix86_atom_sched_reorder
25517#undef TARGET_SCHED_ADJUST_PRIORITY
25518#define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
25519#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
25520#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
25521 ix86_dependencies_evaluation_hook
25522
25523
25524/* Implementation of reassociation_width target hook used by
25525 reassoc phase to identify parallelism level in reassociated
25526 tree. Statements tree_code is passed in OPC. Arguments type
25527 is passed in MODE. */
25528
25529static int
25530ix86_reassociation_width (unsigned int op, machine_mode mode)
25531{
25532 int width = 1;
25533 /* Vector part. */
25534 if (VECTOR_MODE_P (mode))
25535 {
25536 int div = 1;
25537 if (INTEGRAL_MODE_P (mode))
25538 width = ix86_cost->reassoc_vec_int;
25539 else if (FLOAT_MODE_P (mode))
25540 width = ix86_cost->reassoc_vec_fp;
25541
25542 if (width == 1)
25543 return 1;
25544
25545 /* Znver1-4 Integer vector instructions execute in FP unit
25546 and can execute 3 additions and one multiplication per cycle. */
25547 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
25548 || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
25549 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
25550 return 1;
25551 /* Znver5 can do 2 integer multiplications per cycle with latency
25552 of 3. */
25553 if ((ix86_tune == PROCESSOR_ZNVER5 || ix86_tune == PROCESSOR_ZNVER6)
25554 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
25555 width = 6;
25556
25557 /* Account for targets that splits wide vectors into multiple parts. */
25558 if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
25559 div = GET_MODE_BITSIZE (mode) / 256;
25560 else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
25561 div = GET_MODE_BITSIZE (mode) / 128;
25562 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
25563 div = GET_MODE_BITSIZE (mode) / 64;
25564 width = (width + div - 1) / div;
25565 }
25566 /* Scalar part. */
25567 else if (INTEGRAL_MODE_P (mode))
25568 width = ix86_cost->reassoc_int;
25569 else if (FLOAT_MODE_P (mode))
25570 width = ix86_cost->reassoc_fp;
25571
25572 /* Avoid using too many registers in 32bit mode. */
25573 if (!TARGET_64BIT && width > 2)
25574 width = 2;
25575 return width;
25576}
25577
25578/* ??? No autovectorization into MMX or 3DNOW until we can reliably
25579 place emms and femms instructions. */
25580
25581static machine_mode
25582ix86_preferred_simd_mode (scalar_mode mode)
25583{
25584 if (!TARGET_SSE)
25585 return word_mode;
25586
25587 switch (mode)
25588 {
25589 case E_QImode:
25590 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
25591 return V64QImode;
25592 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25593 return V32QImode;
25594 else
25595 return V16QImode;
25596
25597 case E_HImode:
25598 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
25599 return V32HImode;
25600 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25601 return V16HImode;
25602 else
25603 return V8HImode;
25604
25605 case E_SImode:
25606 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25607 return V16SImode;
25608 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25609 return V8SImode;
25610 else
25611 return V4SImode;
25612
25613 case E_DImode:
25614 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25615 return V8DImode;
25616 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25617 return V4DImode;
25618 else
25619 return V2DImode;
25620
25621 case E_HFmode:
25622 if (TARGET_AVX512FP16)
25623 {
25624 if (TARGET_AVX512VL)
25625 {
25626 if (TARGET_PREFER_AVX128)
25627 return V8HFmode;
25628 else if (TARGET_PREFER_AVX256)
25629 return V16HFmode;
25630 }
25631 return V32HFmode;
25632 }
25633 return word_mode;
25634
25635 case E_BFmode:
25636 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25637 return V32BFmode;
25638 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25639 return V16BFmode;
25640 else
25641 return V8BFmode;
25642
25643 case E_SFmode:
25644 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25645 return V16SFmode;
25646 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25647 return V8SFmode;
25648 else
25649 return V4SFmode;
25650
25651 case E_DFmode:
25652 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25653 return V8DFmode;
25654 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25655 return V4DFmode;
25656 else if (TARGET_SSE2)
25657 return V2DFmode;
25658 /* FALLTHRU */
25659
25660 default:
25661 return word_mode;
25662 }
25663}
25664
25665/* If AVX is enabled then try vectorizing with both 256bit and 128bit
25666 vectors. If AVX512F is enabled then try vectorizing with 512bit,
25667 256bit and 128bit vectors. */
25668
25669static unsigned int
25670ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
25671{
25672 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25673 {
25674 modes->safe_push (V64QImode);
25675 modes->safe_push (V32QImode);
25676 modes->safe_push (V16QImode);
25677 }
25678 else if (TARGET_AVX512F && all)
25679 {
25680 modes->safe_push (V32QImode);
25681 modes->safe_push (V16QImode);
25682 modes->safe_push (V64QImode);
25683 }
25684 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25685 {
25686 modes->safe_push (V32QImode);
25687 modes->safe_push (V16QImode);
25688 }
25689 else if (TARGET_AVX && all)
25690 {
25691 modes->safe_push (V16QImode);
25692 modes->safe_push (V32QImode);
25693 }
25694 else if (TARGET_SSE2)
25695 modes->safe_push (V16QImode);
25696
25697 if (TARGET_MMX_WITH_SSE)
25698 modes->safe_push (V8QImode);
25699
25700 if (TARGET_SSE2)
25701 modes->safe_push (V4QImode);
25702
25703 return ix86_vect_compare_costs ? VECT_COMPARE_COSTS : 0;
25704}
25705
25706/* Implemenation of targetm.vectorize.get_mask_mode. */
25707
25708static opt_machine_mode
25709ix86_get_mask_mode (machine_mode data_mode)
25710{
25711 unsigned vector_size = GET_MODE_SIZE (data_mode);
25712 unsigned nunits = GET_MODE_NUNITS (data_mode);
25713 unsigned elem_size = vector_size / nunits;
25714
25715 /* Scalar mask case. */
25716 if ((TARGET_AVX512F && vector_size == 64)
25717 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
25718 /* AVX512FP16 only supports vector comparison
25719 to kmask for _Float16. */
25720 || (TARGET_AVX512VL && TARGET_AVX512FP16
25721 && GET_MODE_INNER (data_mode) == E_HFmode)
25722 || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode))
25723 {
25724 if (elem_size == 4
25725 || elem_size == 8
25726 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
25727 return smallest_int_mode_for_size (size: nunits).require ();
25728 }
25729
25730 scalar_int_mode elem_mode
25731 = smallest_int_mode_for_size (size: elem_size * BITS_PER_UNIT).require ();
25732
25733 gcc_assert (elem_size * nunits == vector_size);
25734
25735 return mode_for_vector (elem_mode, nunits);
25736}
25737
25738
25739
25740/* Return class of registers which could be used for pseudo of MODE
25741 and of class RCLASS for spilling instead of memory. Return NO_REGS
25742 if it is not possible or non-profitable. */
25743
25744/* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
25745
25746static reg_class_t
25747ix86_spill_class (reg_class_t rclass, machine_mode mode)
25748{
25749 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
25750 && TARGET_SSE2
25751 && TARGET_INTER_UNIT_MOVES_TO_VEC
25752 && TARGET_INTER_UNIT_MOVES_FROM_VEC
25753 && (mode == SImode || (TARGET_64BIT && mode == DImode))
25754 && INTEGER_CLASS_P (rclass))
25755 return ALL_SSE_REGS;
25756 return NO_REGS;
25757}
25758
25759/* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
25760 but returns a lower bound. */
25761
25762static unsigned int
25763ix86_max_noce_ifcvt_seq_cost (edge e)
25764{
25765 bool predictable_p = predictable_edge_p (e);
25766 if (predictable_p)
25767 {
25768 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
25769 return param_max_rtl_if_conversion_predictable_cost;
25770 }
25771 else
25772 {
25773 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
25774 return param_max_rtl_if_conversion_unpredictable_cost;
25775 }
25776
25777 /* For modern machines with deeper pipeline, the penalty for branch
25778 misprediction could be higher than before to reset the pipeline
25779 slots. Add parameter br_mispredict_scale as a factor to describe
25780 the impact of reseting the pipeline. */
25781
25782 return BRANCH_COST (true, predictable_p)
25783 * ix86_tune_cost->br_mispredict_scale;
25784}
25785
25786/* Return true if SEQ is a good candidate as a replacement for the
25787 if-convertible sequence described in IF_INFO. */
25788
25789static bool
25790ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
25791{
25792 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
25793 {
25794 int cmov_cnt = 0;
25795 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
25796 Maybe we should allow even more conditional moves as long as they
25797 are used far enough not to stall the CPU, or also consider
25798 IF_INFO->TEST_BB succ edge probabilities. */
25799 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
25800 {
25801 rtx set = single_set (insn);
25802 if (!set)
25803 continue;
25804 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
25805 continue;
25806 rtx src = SET_SRC (set);
25807 machine_mode mode = GET_MODE (src);
25808 if (GET_MODE_CLASS (mode) != MODE_INT
25809 && GET_MODE_CLASS (mode) != MODE_FLOAT)
25810 continue;
25811 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
25812 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
25813 continue;
25814 /* insn is CMOV or FCMOV. */
25815 if (++cmov_cnt > 1)
25816 return false;
25817 }
25818 }
25819
25820 /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por)
25821 for movdfcc/movsfcc, and could possibly fail cost comparison.
25822 Increase branch cost will hurt performance for other modes, so
25823 specially add some preference for floating point ifcvt. */
25824 if (!TARGET_SSE4_1 && if_info->x
25825 && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT
25826 && if_info->speed_p)
25827 {
25828 unsigned cost = seq_cost (seq, true);
25829
25830 if (cost <= if_info->original_cost)
25831 return true;
25832
25833 return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2));
25834 }
25835
25836 return default_noce_conversion_profitable_p (seq, if_info);
25837}
25838
25839/* x86-specific vector costs. */
25840class ix86_vector_costs : public vector_costs
25841{
25842public:
25843 ix86_vector_costs (vec_info *, bool);
25844
25845 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
25846 stmt_vec_info stmt_info, slp_tree node,
25847 tree vectype, int misalign,
25848 vect_cost_model_location where) override;
25849 void finish_cost (const vector_costs *) override;
25850
25851private:
25852
25853 /* Estimate register pressure of the vectorized code. */
25854 void ix86_vect_estimate_reg_pressure ();
25855 /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
25856 estimation of register pressure.
25857 ??? Currently it's only used by vec_construct/scalar_to_vec
25858 where we know it's not loaded from memory. */
25859 unsigned m_num_gpr_needed[3];
25860 unsigned m_num_sse_needed[3];
25861 /* Number of 256-bit vector permutation. */
25862 unsigned m_num_avx256_vec_perm[3];
25863 /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR */
25864 unsigned m_num_reduc[X86_REDUC_LAST];
25865 /* Don't do unroll if m_prefer_unroll is false, default is true. */
25866 bool m_prefer_unroll;
25867};
25868
25869ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
25870 : vector_costs (vinfo, costing_for_scalar),
25871 m_num_gpr_needed (),
25872 m_num_sse_needed (),
25873 m_num_avx256_vec_perm (),
25874 m_num_reduc (),
25875 m_prefer_unroll (true)
25876{}
25877
25878/* Implement targetm.vectorize.create_costs. */
25879
25880static vector_costs *
25881ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
25882{
25883 return new ix86_vector_costs (vinfo, costing_for_scalar);
25884}
25885
25886unsigned
25887ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
25888 stmt_vec_info stmt_info, slp_tree node,
25889 tree vectype, int,
25890 vect_cost_model_location where)
25891{
25892 unsigned retval = 0;
25893 bool scalar_p
25894 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
25895 int stmt_cost = - 1;
25896
25897 bool fp = false;
25898 machine_mode mode = scalar_p ? SImode : TImode;
25899
25900 if (vectype != NULL)
25901 {
25902 fp = FLOAT_TYPE_P (vectype);
25903 mode = TYPE_MODE (vectype);
25904 if (scalar_p)
25905 mode = TYPE_MODE (TREE_TYPE (vectype));
25906 }
25907 /* When we are costing a scalar stmt use the scalar stmt to get at the
25908 type of the operation. */
25909 else if (scalar_p && stmt_info)
25910 if (tree lhs = gimple_get_lhs (stmt_info->stmt))
25911 {
25912 fp = FLOAT_TYPE_P (TREE_TYPE (lhs));
25913 mode = TYPE_MODE (TREE_TYPE (lhs));
25914 }
25915
25916 if ((kind == vector_stmt || kind == scalar_stmt)
25917 && stmt_info
25918 && stmt_info->stmt && gimple_code (g: stmt_info->stmt) == GIMPLE_ASSIGN)
25919 {
25920 tree_code subcode = gimple_assign_rhs_code (gs: stmt_info->stmt);
25921 /*machine_mode inner_mode = mode;
25922 if (VECTOR_MODE_P (mode))
25923 inner_mode = GET_MODE_INNER (mode);*/
25924
25925 switch (subcode)
25926 {
25927 case PLUS_EXPR:
25928 case POINTER_PLUS_EXPR:
25929 case MINUS_EXPR:
25930 if (kind == scalar_stmt)
25931 {
25932 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
25933 stmt_cost = ix86_cost->addss;
25934 else if (X87_FLOAT_MODE_P (mode))
25935 stmt_cost = ix86_cost->fadd;
25936 else
25937 stmt_cost = ix86_cost->add;
25938 }
25939 else
25940 stmt_cost = ix86_vec_cost (mode, cost: fp ? ix86_cost->addss
25941 : ix86_cost->sse_op);
25942 break;
25943
25944 case MULT_EXPR:
25945 /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
25946 take it as MULT_EXPR. */
25947 case MULT_HIGHPART_EXPR:
25948 stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode);
25949 break;
25950 /* There's no direct instruction for WIDEN_MULT_EXPR,
25951 take emulation into account. */
25952 case WIDEN_MULT_EXPR:
25953 stmt_cost = ix86_widen_mult_cost (cost: ix86_cost, mode,
25954 TYPE_UNSIGNED (vectype));
25955 break;
25956
25957 case NEGATE_EXPR:
25958 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
25959 stmt_cost = ix86_cost->sse_op;
25960 else if (X87_FLOAT_MODE_P (mode))
25961 stmt_cost = ix86_cost->fchs;
25962 else if (VECTOR_MODE_P (mode))
25963 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25964 else
25965 stmt_cost = ix86_cost->add;
25966 break;
25967 case TRUNC_DIV_EXPR:
25968 case CEIL_DIV_EXPR:
25969 case FLOOR_DIV_EXPR:
25970 case ROUND_DIV_EXPR:
25971 case TRUNC_MOD_EXPR:
25972 case CEIL_MOD_EXPR:
25973 case FLOOR_MOD_EXPR:
25974 case RDIV_EXPR:
25975 case ROUND_MOD_EXPR:
25976 case EXACT_DIV_EXPR:
25977 stmt_cost = ix86_division_cost (cost: ix86_cost, mode);
25978 break;
25979
25980 case RSHIFT_EXPR:
25981 case LSHIFT_EXPR:
25982 case LROTATE_EXPR:
25983 case RROTATE_EXPR:
25984 {
25985 tree op1 = gimple_assign_rhs1 (gs: stmt_info->stmt);
25986 tree op2 = gimple_assign_rhs2 (gs: stmt_info->stmt);
25987 stmt_cost = ix86_shift_rotate_cost
25988 (cost: ix86_cost,
25989 code: (subcode == RSHIFT_EXPR
25990 && !TYPE_UNSIGNED (TREE_TYPE (op1)))
25991 ? ASHIFTRT : LSHIFTRT, mode,
25992 TREE_CODE (op2) == INTEGER_CST,
25993 op1_val: cst_and_fits_in_hwi (op2)
25994 ? int_cst_value (op2) : -1,
25995 and_in_op1: false, shift_and_truncate: false, NULL, NULL);
25996 }
25997 break;
25998 case NOP_EXPR:
25999 /* Only sign-conversions are free. */
26000 if (tree_nop_conversion_p
26001 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
26002 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
26003 stmt_cost = 0;
26004 else if (fp)
26005 stmt_cost = vec_fp_conversion_cost
26006 (cost: ix86_tune_cost, GET_MODE_BITSIZE (mode));
26007 break;
26008
26009 case FLOAT_EXPR:
26010 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26011 stmt_cost = ix86_cost->cvtsi2ss;
26012 else if (X87_FLOAT_MODE_P (mode))
26013 /* TODO: We do not have cost tables for x87. */
26014 stmt_cost = ix86_cost->fadd;
26015 else
26016 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtpi2ps);
26017 break;
26018
26019 case FIX_TRUNC_EXPR:
26020 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26021 stmt_cost = ix86_cost->cvtss2si;
26022 else if (X87_FLOAT_MODE_P (mode))
26023 /* TODO: We do not have cost tables for x87. */
26024 stmt_cost = ix86_cost->fadd;
26025 else
26026 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtps2pi);
26027 break;
26028
26029 case COND_EXPR:
26030 {
26031 /* SSE2 conditinal move sequence is:
26032 pcmpgtd %xmm5, %xmm0 (accounted separately)
26033 pand %xmm0, %xmm2
26034 pandn %xmm1, %xmm0
26035 por %xmm2, %xmm0
26036 while SSE4 uses cmp + blend
26037 and AVX512 masked moves.
26038
26039 The condition is accounted separately since we usually have
26040 p = a < b
26041 c = p ? x : y
26042 and we will account first statement as setcc. Exception is when
26043 p is loaded from memory as bool and then we will not acocunt
26044 the compare, but there is no way to check for this. */
26045
26046 int ninsns = TARGET_SSE4_1 ? 1 : 3;
26047
26048 /* If one of parameters is 0 or -1 the sequence will be simplified:
26049 (if_true & mask) | (if_false & ~mask) -> if_true & mask */
26050 if (ninsns > 1
26051 && (zerop (gimple_assign_rhs2 (gs: stmt_info->stmt))
26052 || zerop (gimple_assign_rhs3 (gs: stmt_info->stmt))
26053 || integer_minus_onep
26054 (gimple_assign_rhs2 (gs: stmt_info->stmt))
26055 || integer_minus_onep
26056 (gimple_assign_rhs3 (gs: stmt_info->stmt))))
26057 ninsns = 1;
26058
26059 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26060 stmt_cost = ninsns * ix86_cost->sse_op;
26061 else if (X87_FLOAT_MODE_P (mode))
26062 /* x87 requires conditional branch. We don't have cost for
26063 that. */
26064 ;
26065 else if (VECTOR_MODE_P (mode))
26066 stmt_cost = ix86_vec_cost (mode, cost: ninsns * ix86_cost->sse_op);
26067 else
26068 /* compare (accounted separately) + cmov. */
26069 stmt_cost = ix86_cost->add;
26070 }
26071 break;
26072
26073 case MIN_EXPR:
26074 case MAX_EXPR:
26075 if (fp)
26076 {
26077 if (X87_FLOAT_MODE_P (mode)
26078 && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26079 /* x87 requires conditional branch. We don't have cost for
26080 that. */
26081 ;
26082 else
26083 /* minss */
26084 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
26085 }
26086 else
26087 {
26088 if (VECTOR_MODE_P (mode))
26089 {
26090 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
26091 /* vpmin was introduced in SSE3.
26092 SSE2 needs pcmpgtd + pand + pandn + pxor.
26093 If one of parameters is 0 or -1 the sequence is simplified
26094 to pcmpgtd + pand. */
26095 if (!TARGET_SSSE3)
26096 {
26097 if (zerop (gimple_assign_rhs2 (gs: stmt_info->stmt))
26098 || integer_minus_onep
26099 (gimple_assign_rhs2 (gs: stmt_info->stmt)))
26100 stmt_cost *= 2;
26101 else
26102 stmt_cost *= 4;
26103 }
26104 }
26105 else
26106 /* cmp + cmov. */
26107 stmt_cost = ix86_cost->add * 2;
26108 }
26109 break;
26110
26111 case ABS_EXPR:
26112 case ABSU_EXPR:
26113 if (fp)
26114 {
26115 if (X87_FLOAT_MODE_P (mode)
26116 && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26117 /* fabs. */
26118 stmt_cost = ix86_cost->fabs;
26119 else
26120 /* andss of sign bit. */
26121 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
26122 }
26123 else
26124 {
26125 if (VECTOR_MODE_P (mode))
26126 {
26127 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
26128 /* vabs was introduced in SSE3.
26129 SSE3 uses psrat + pxor + psub. */
26130 if (!TARGET_SSSE3)
26131 stmt_cost *= 3;
26132 }
26133 else
26134 /* neg + cmov. */
26135 stmt_cost = ix86_cost->add * 2;
26136 }
26137 break;
26138
26139 case BIT_IOR_EXPR:
26140 case BIT_XOR_EXPR:
26141 case BIT_AND_EXPR:
26142 case BIT_NOT_EXPR:
26143 gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
26144 && !X87_FLOAT_MODE_P (mode));
26145 if (VECTOR_MODE_P (mode))
26146 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
26147 else
26148 stmt_cost = ix86_cost->add;
26149 break;
26150
26151 default:
26152 if (truth_value_p (code: subcode))
26153 {
26154 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
26155 /* CMPccS? insructions are cheap, so use sse_op. While they
26156 produce a mask which may need to be turned to 0/1 by and,
26157 expect that this will be optimized away in a common case. */
26158 stmt_cost = ix86_cost->sse_op;
26159 else if (X87_FLOAT_MODE_P (mode))
26160 /* fcmp + setcc. */
26161 stmt_cost = ix86_cost->fadd + ix86_cost->add;
26162 else if (VECTOR_MODE_P (mode))
26163 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
26164 else
26165 /* setcc. */
26166 stmt_cost = ix86_cost->add;
26167 break;
26168 }
26169 break;
26170 }
26171 }
26172
26173 /* Record number of load/store/gather/scatter in vectorized body. */
26174 if (where == vect_body && !m_costing_for_scalar)
26175 {
26176 int scale = 1;
26177 if (vectype
26178 && ((GET_MODE_SIZE (TYPE_MODE (vectype)) == 64
26179 && TARGET_AVX512_SPLIT_REGS)
26180 || (GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
26181 && TARGET_AVX256_SPLIT_REGS)))
26182 scale = 2;
26183
26184 switch (kind)
26185 {
26186 /* Emulated gather/scatter or any scalarization. */
26187 case scalar_load:
26188 case scalar_stmt:
26189 case scalar_store:
26190 case vector_gather_load:
26191 case vector_scatter_store:
26192 m_prefer_unroll = false;
26193 break;
26194
26195 case vector_stmt:
26196 case vec_to_scalar:
26197 /* Count number of reduction FMA and "real" DOT_PROD_EXPR,
26198 unroll in the vectorizer will enable partial sum. */
26199 if (stmt_info
26200 && vect_is_reduction (stmt_info)
26201 && stmt_info->stmt)
26202 {
26203 /* Handle __builtin_fma. */
26204 if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
26205 {
26206 m_num_reduc[X86_REDUC_FMA] += count * scale;
26207 break;
26208 }
26209
26210 if (!is_gimple_assign (gs: stmt_info->stmt))
26211 break;
26212
26213 tree_code subcode = gimple_assign_rhs_code (gs: stmt_info->stmt);
26214 machine_mode inner_mode = GET_MODE_INNER (mode);
26215 tree rhs1, rhs2;
26216 bool native_vnni_p = true;
26217 gimple* def;
26218 machine_mode mode_rhs;
26219 switch (subcode)
26220 {
26221 case PLUS_EXPR:
26222 case MINUS_EXPR:
26223 if (!fp || !flag_associative_math
26224 || flag_fp_contract_mode != FP_CONTRACT_FAST)
26225 break;
26226
26227 /* FMA condition for different modes. */
26228 if (((inner_mode == DFmode || inner_mode == SFmode)
26229 && !TARGET_FMA && !TARGET_AVX512VL)
26230 || (inner_mode == HFmode && !TARGET_AVX512FP16)
26231 || (inner_mode == BFmode && !TARGET_AVX10_2))
26232 break;
26233
26234 /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed
26235 to FMA/FNMA after vectorization. */
26236 rhs1 = gimple_assign_rhs1 (gs: stmt_info->stmt);
26237 rhs2 = gimple_assign_rhs2 (gs: stmt_info->stmt);
26238 if (subcode == PLUS_EXPR
26239 && TREE_CODE (rhs1) == SSA_NAME
26240 && (def = SSA_NAME_DEF_STMT (rhs1), true)
26241 && is_gimple_assign (gs: def)
26242 && gimple_assign_rhs_code (gs: def) == MULT_EXPR)
26243 m_num_reduc[X86_REDUC_FMA] += count * scale;
26244 else if (TREE_CODE (rhs2) == SSA_NAME
26245 && (def = SSA_NAME_DEF_STMT (rhs2), true)
26246 && is_gimple_assign (gs: def)
26247 && gimple_assign_rhs_code (gs: def) == MULT_EXPR)
26248 m_num_reduc[X86_REDUC_FMA] += count * scale;
26249 break;
26250
26251 /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
26252 WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports
26253 SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR. */
26254 case DOT_PROD_EXPR:
26255 rhs1 = gimple_assign_rhs1 (gs: stmt_info->stmt);
26256 mode_rhs = TYPE_MODE (TREE_TYPE (rhs1));
26257 if (mode_rhs == QImode)
26258 {
26259 rhs2 = gimple_assign_rhs2 (gs: stmt_info->stmt);
26260 signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1));
26261 signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2));
26262
26263 /* vpdpbusd. */
26264 if (signop1_p != signop2_p)
26265 native_vnni_p
26266 = (GET_MODE_SIZE (mode) == 64
26267 ? TARGET_AVX512VNNI
26268 : ((TARGET_AVX512VNNI && TARGET_AVX512VL)
26269 || TARGET_AVXVNNI));
26270 else
26271 /* vpdpbssd. */
26272 native_vnni_p
26273 = (GET_MODE_SIZE (mode) == 64
26274 ? TARGET_AVX10_2
26275 : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
26276 }
26277 m_num_reduc[X86_REDUC_DOT_PROD] += count * scale;
26278
26279 /* Dislike to do unroll and partial sum for
26280 emulated DOT_PROD_EXPR. */
26281 if (!native_vnni_p)
26282 m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count;
26283 break;
26284
26285 case SAD_EXPR:
26286 m_num_reduc[X86_REDUC_SAD] += count * scale;
26287 break;
26288
26289 default:
26290 break;
26291 }
26292 }
26293
26294 default:
26295 break;
26296 }
26297 }
26298
26299
26300 combined_fn cfn;
26301 if ((kind == vector_stmt || kind == scalar_stmt)
26302 && stmt_info
26303 && stmt_info->stmt
26304 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
26305 switch (cfn)
26306 {
26307 case CFN_FMA:
26308 stmt_cost = ix86_vec_cost (mode,
26309 cost: mode == SFmode ? ix86_cost->fmass
26310 : ix86_cost->fmasd);
26311 break;
26312 case CFN_MULH:
26313 stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode);
26314 break;
26315 default:
26316 break;
26317 }
26318
26319 if (kind == vec_promote_demote)
26320 {
26321 int outer_size
26322 = tree_to_uhwi
26323 (TYPE_SIZE
26324 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
26325 int inner_size
26326 = tree_to_uhwi
26327 (TYPE_SIZE
26328 (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
26329 bool inner_fp = FLOAT_TYPE_P
26330 (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
26331
26332 if (fp && inner_fp)
26333 stmt_cost = vec_fp_conversion_cost
26334 (cost: ix86_tune_cost, GET_MODE_BITSIZE (mode));
26335 else if (fp && !inner_fp)
26336 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtpi2ps);
26337 else if (!fp && inner_fp)
26338 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtps2pi);
26339 else
26340 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
26341 /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
26342 greater than inner size we will end up doing two conversions and
26343 packing them. We always pack pairs; if the size difference is greater
26344 it is split into multiple demote operations. */
26345 if (inner_size > outer_size)
26346 stmt_cost = stmt_cost * 2
26347 + ix86_vec_cost (mode, cost: ix86_cost->sse_op);
26348 }
26349
26350 /* If we do elementwise loads into a vector then we are bound by
26351 latency and execution resources for the many scalar loads
26352 (AGU and load ports). Try to account for this by scaling the
26353 construction cost by the number of elements involved. */
26354 if ((kind == vec_construct || kind == vec_to_scalar)
26355 && ((node
26356 && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
26357 || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
26358 && SLP_TREE_LANES (node) == 1))
26359 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
26360 (SLP_TREE_REPRESENTATIVE (node))))
26361 != INTEGER_CST))
26362 || mat_gather_scatter_p (mat: SLP_TREE_MEMORY_ACCESS_TYPE (node))))))
26363 {
26364 stmt_cost = ix86_default_vector_cost (type_of_cost: kind, mode);
26365 stmt_cost *= (TYPE_VECTOR_SUBPARTS (node: vectype) + 1);
26366 }
26367 else if ((kind == vec_construct || kind == scalar_to_vec)
26368 && node
26369 && SLP_TREE_DEF_TYPE (node) == vect_external_def)
26370 {
26371 stmt_cost = ix86_default_vector_cost (type_of_cost: kind, mode);
26372 unsigned i;
26373 tree op;
26374 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26375 if (TREE_CODE (op) == SSA_NAME)
26376 TREE_VISITED (op) = 0;
26377 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26378 {
26379 if (TREE_CODE (op) != SSA_NAME
26380 || TREE_VISITED (op))
26381 continue;
26382 TREE_VISITED (op) = 1;
26383 gimple *def = SSA_NAME_DEF_STMT (op);
26384 tree tem;
26385 if (is_gimple_assign (gs: def)
26386 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
26387 && ((tem = gimple_assign_rhs1 (gs: def)), true)
26388 && TREE_CODE (tem) == SSA_NAME
26389 /* A sign-change expands to nothing. */
26390 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)),
26391 TREE_TYPE (tem)))
26392 def = SSA_NAME_DEF_STMT (tem);
26393 /* When the component is loaded from memory we can directly
26394 move it to a vector register, otherwise we have to go
26395 via a GPR or via vpinsr which involves similar cost.
26396 Likewise with a BIT_FIELD_REF extracting from a vector
26397 register we can hope to avoid using a GPR. */
26398 if (!is_gimple_assign (gs: def)
26399 || ((!gimple_assign_load_p (def)
26400 || (!TARGET_SSE4_1
26401 && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) == 1))
26402 && (gimple_assign_rhs_code (gs: def) != BIT_FIELD_REF
26403 || !VECTOR_TYPE_P (TREE_TYPE
26404 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
26405 {
26406 if (fp)
26407 {
26408 /* Scalar FP values residing in x87 registers need to be
26409 spilled and reloaded. */
26410 auto mode2 = TYPE_MODE (TREE_TYPE (op));
26411 if (IS_STACK_MODE (mode2))
26412 {
26413 int cost
26414 = (ix86_cost->hard_register.fp_store[mode2 == SFmode
26415 ? 0 : 1]
26416 + ix86_cost->sse_load[sse_store_index (mode: mode2)]);
26417 stmt_cost += COSTS_N_INSNS (cost) / 2;
26418 }
26419 m_num_sse_needed[where]++;
26420 }
26421 else
26422 {
26423 m_num_gpr_needed[where]++;
26424
26425 stmt_cost += COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
26426 }
26427 }
26428 }
26429 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26430 if (TREE_CODE (op) == SSA_NAME)
26431 TREE_VISITED (op) = 0;
26432 }
26433 if (stmt_cost == -1)
26434 stmt_cost = ix86_default_vector_cost (type_of_cost: kind, mode);
26435
26436 if (kind == vec_perm && vectype
26437 && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
26438 /* BIT_FIELD_REF <vect_**, 64, 0> 0 times vec_perm costs 0 in body. */
26439 && count != 0)
26440 {
26441 bool real_perm = true;
26442 unsigned nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
26443
26444 if (node
26445 && SLP_TREE_LOAD_PERMUTATION (node).exists ()
26446 /* Loop vectorization will have 4 times vec_perm
26447 with index as {0, 0, 0, 0}.
26448 But it actually generates
26449 vec_perm_expr <vect, vect, 0, 0, 0, 0>
26450 vec_perm_expr <vect, vect, 1, 1, 1, 1>
26451 vec_perm_expr <vect, vect, 2, 2, 2, 2>
26452 Need to be handled separately. */
26453 && is_a <bb_vec_info> (p: m_vinfo))
26454 {
26455 unsigned half = nunits / 2;
26456 unsigned i = 0;
26457 bool allsame = true;
26458 unsigned first = SLP_TREE_LOAD_PERMUTATION (node)[0];
26459 bool cross_lane_p = false;
26460 for (i = 0 ; i != SLP_TREE_LANES (node); i++)
26461 {
26462 unsigned tmp = SLP_TREE_LOAD_PERMUTATION (node)[i];
26463 /* allsame is just a broadcast. */
26464 if (tmp != first)
26465 allsame = false;
26466
26467 /* 4 times vec_perm with number of lanes multiple of nunits. */
26468 tmp = tmp & (nunits - 1);
26469 unsigned index = i & (nunits - 1);
26470 if ((index < half && tmp >= half)
26471 || (index >= half && tmp < half))
26472 cross_lane_p = true;
26473
26474 if (!allsame && cross_lane_p)
26475 break;
26476 }
26477
26478 if (i == SLP_TREE_LANES (node))
26479 real_perm = false;
26480 }
26481
26482 if (real_perm)
26483 {
26484 m_num_avx256_vec_perm[where] += count;
26485 if (dump_file && (dump_flags & TDF_DETAILS))
26486 {
26487 fprintf (stream: dump_file, format: "Detected avx256 cross-lane permutation: ");
26488 if (stmt_info)
26489 print_gimple_expr (dump_file, stmt_info->stmt, 0, TDF_SLIM);
26490 fprintf (stream: dump_file, format: " \n");
26491 }
26492 }
26493 }
26494
26495 /* Penalize DFmode vector operations for Bonnell. */
26496 if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
26497 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
26498 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
26499
26500 /* Statements in an inner loop relative to the loop being
26501 vectorized are weighted more heavily. The value here is
26502 arbitrary and could potentially be improved with analysis. */
26503 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
26504
26505 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
26506 for Silvermont as it has out of order integer pipeline and can execute
26507 2 scalar instruction per tick, but has in order SIMD pipeline. */
26508 if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
26509 || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
26510 && stmt_info && stmt_info->stmt)
26511 {
26512 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
26513 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
26514 retval = (retval * 17) / 10;
26515 }
26516
26517 m_costs[where] += retval;
26518
26519 return retval;
26520}
26521
26522void
26523ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
26524{
26525 unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
26526 unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
26527
26528 /* Any better way to have target available fp registers, currently use SSE_REGS. */
26529 unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
26530 for (unsigned i = 0; i != 3; i++)
26531 {
26532 if (m_num_gpr_needed[i] > target_avail_regs)
26533 m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
26534 /* Only measure sse registers pressure. */
26535 if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
26536 m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
26537 }
26538}
26539
26540void
26541ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
26542{
26543 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: m_vinfo);
26544 if (loop_vinfo && !m_costing_for_scalar)
26545 {
26546 /* We are currently not asking the vectorizer to compare costs
26547 between different vector mode sizes. When using predication
26548 that will end up always choosing the prefered mode size even
26549 if there's a smaller mode covering all lanes. Test for this
26550 situation and artificially reject the larger mode attempt.
26551 ??? We currently lack masked ops for sub-SSE sized modes,
26552 so we could restrict this rejection to AVX and AVX512 modes
26553 but error on the safe side for now. */
26554 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
26555 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26556 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
26557 && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
26558 > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
26559 m_costs[vect_body] = INT_MAX;
26560
26561 /* We'd like to avoid using masking if there's an in-order reduction
26562 to vectorize because that will also perform in-order adds of
26563 masked elements (as neutral value, of course) here, but there
26564 is currently no way to indicate to try un-masked with the same
26565 mode. */
26566
26567 bool any_reduc_p = false;
26568 for (int i = 0; i != X86_REDUC_LAST; i++)
26569 if (m_num_reduc[i])
26570 {
26571 any_reduc_p = true;
26572 break;
26573 }
26574
26575 if (any_reduc_p
26576 /* Not much gain for loop with gather and scatter. */
26577 && m_prefer_unroll
26578 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo))
26579 {
26580 unsigned unroll_factor
26581 = OPTION_SET_P (ix86_vect_unroll_limit)
26582 ? ix86_vect_unroll_limit
26583 : ix86_cost->vect_unroll_limit;
26584
26585 if (unroll_factor > 1)
26586 {
26587 for (int i = 0 ; i != X86_REDUC_LAST; i++)
26588 {
26589 if (m_num_reduc[i])
26590 {
26591 unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i],
26592 m_num_reduc[i]);
26593 unroll_factor = MIN (unroll_factor, tmp);
26594 }
26595 }
26596
26597 m_suggested_unroll_factor = 1 << ceil_log2 (x: unroll_factor);
26598 }
26599 }
26600
26601 }
26602
26603 ix86_vect_estimate_reg_pressure ();
26604
26605 for (int i = 0; i != 3; i++)
26606 if (m_num_avx256_vec_perm[i]
26607 && TARGET_AVX256_AVOID_VEC_PERM)
26608 m_costs[i] = INT_MAX;
26609
26610 /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
26611 a AVX2 and a SSE epilogue for AVX512 vectorized loops. */
26612 if (loop_vinfo
26613 && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26614 && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
26615 && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
26616 m_suggested_epilogue_mode = V16QImode;
26617 /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
26618 enable a 64bit SSE epilogue. */
26619 if (loop_vinfo
26620 && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26621 && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16
26622 && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
26623 m_suggested_epilogue_mode = V8QImode;
26624
26625 /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
26626 a masked epilogue if that doesn't seem detrimental. */
26627 if (loop_vinfo
26628 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26629 && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
26630 /* Avoid a masked epilog if cascaded epilogues eventually get us
26631 to one with VF 1 as that means no scalar epilog at all. */
26632 && !((GET_MODE_SIZE (loop_vinfo->vector_mode)
26633 / LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () == 16)
26634 && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
26635 && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
26636 && !OPTION_SET_P (param_vect_partial_vector_usage))
26637 {
26638 bool avoid = false;
26639 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
26640 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
26641 {
26642 unsigned int peel_niter
26643 = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
26644 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
26645 peel_niter += 1;
26646 /* When we know the number of scalar iterations of the epilogue,
26647 avoid masking when a single vector epilog iteration handles
26648 it in full. */
26649 if (pow2p_hwi (x: (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
26650 % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()))
26651 avoid = true;
26652 }
26653 if (!avoid && loop_outer (loop: loop_outer (LOOP_VINFO_LOOP (loop_vinfo))))
26654 for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
26655 {
26656 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
26657 ;
26658 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
26659 ;
26660 else
26661 {
26662 int loop_depth
26663 = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
26664 DDR_LOOP_NEST (ddr));
26665 if (DDR_NUM_DIST_VECTS (ddr) == 1
26666 && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0)
26667 {
26668 /* Avoid the case when there's an outer loop that might
26669 traverse a multi-dimensional array with the inner
26670 loop just executing the masked epilogue with a
26671 read-write where the next outer iteration might
26672 read from the masked part of the previous write,
26673 'n' filling half a vector.
26674 for (j = 0; j < m; ++j)
26675 for (i = 0; i < n; ++i)
26676 a[j][i] = c * a[j][i]; */
26677 avoid = true;
26678 break;
26679 }
26680 }
26681 }
26682 /* Avoid using masking if there's an in-order reduction
26683 to vectorize because that will also perform in-order adds of
26684 masked elements (as neutral value, of course). */
26685 if (!avoid)
26686 {
26687 for (auto inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo))
26688 if (SLP_INSTANCE_KIND (inst) == slp_inst_kind_reduc_group
26689 && (vect_reduc_type (vinfo: loop_vinfo, SLP_INSTANCE_TREE (inst))
26690 == FOLD_LEFT_REDUCTION))
26691 {
26692 avoid = true;
26693 break;
26694 }
26695 }
26696 if (!avoid)
26697 {
26698 m_suggested_epilogue_mode = loop_vinfo->vector_mode;
26699 m_masked_epilogue = 1;
26700 }
26701 }
26702
26703 vector_costs::finish_cost (scalar_costs);
26704}
26705
26706/* Validate target specific memory model bits in VAL. */
26707
26708static unsigned HOST_WIDE_INT
26709ix86_memmodel_check (unsigned HOST_WIDE_INT val)
26710{
26711 enum memmodel model = memmodel_from_int (val);
26712 bool strong;
26713
26714 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
26715 |MEMMODEL_MASK)
26716 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
26717 {
26718 warning (OPT_Winvalid_memory_model,
26719 "unknown architecture specific memory model");
26720 return MEMMODEL_SEQ_CST;
26721 }
26722 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
26723 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
26724 {
26725 warning (OPT_Winvalid_memory_model,
26726 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
26727 "memory model");
26728 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
26729 }
26730 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
26731 {
26732 warning (OPT_Winvalid_memory_model,
26733 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
26734 "memory model");
26735 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
26736 }
26737 return val;
26738}
26739
26740/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
26741 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
26742 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
26743 or number of vecsize_mangle variants that should be emitted. */
26744
26745static int
26746ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
26747 struct cgraph_simd_clone *clonei,
26748 tree base_type, int num,
26749 bool explicit_p)
26750{
26751 int ret = 1;
26752
26753 if (clonei->simdlen
26754 && (clonei->simdlen < 2
26755 || clonei->simdlen > 1024
26756 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
26757 {
26758 if (explicit_p)
26759 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
26760 "unsupported simdlen %wd", clonei->simdlen.to_constant ());
26761 return 0;
26762 }
26763
26764 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
26765 if (TREE_CODE (ret_type) != VOID_TYPE)
26766 switch (TYPE_MODE (ret_type))
26767 {
26768 case E_QImode:
26769 case E_HImode:
26770 case E_SImode:
26771 case E_DImode:
26772 case E_SFmode:
26773 case E_DFmode:
26774 /* case E_SCmode: */
26775 /* case E_DCmode: */
26776 if (!AGGREGATE_TYPE_P (ret_type))
26777 break;
26778 /* FALLTHRU */
26779 default:
26780 if (explicit_p)
26781 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
26782 "unsupported return type %qT for simd", ret_type);
26783 return 0;
26784 }
26785
26786 tree t;
26787 int i;
26788 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
26789 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
26790
26791 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
26792 t && t != void_list_node; t = TREE_CHAIN (t), i++)
26793 {
26794 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
26795 switch (TYPE_MODE (arg_type))
26796 {
26797 case E_QImode:
26798 case E_HImode:
26799 case E_SImode:
26800 case E_DImode:
26801 case E_SFmode:
26802 case E_DFmode:
26803 /* case E_SCmode: */
26804 /* case E_DCmode: */
26805 if (!AGGREGATE_TYPE_P (arg_type))
26806 break;
26807 /* FALLTHRU */
26808 default:
26809 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
26810 break;
26811 if (explicit_p)
26812 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
26813 "unsupported argument type %qT for simd", arg_type);
26814 return 0;
26815 }
26816 }
26817
26818 if (!TREE_PUBLIC (node->decl) || !explicit_p)
26819 {
26820 /* If the function isn't exported, we can pick up just one ISA
26821 for the clones. */
26822 if (TARGET_AVX512F)
26823 clonei->vecsize_mangle = 'e';
26824 else if (TARGET_AVX2)
26825 clonei->vecsize_mangle = 'd';
26826 else if (TARGET_AVX)
26827 clonei->vecsize_mangle = 'c';
26828 else
26829 clonei->vecsize_mangle = 'b';
26830 ret = 1;
26831 }
26832 else
26833 {
26834 clonei->vecsize_mangle = "bcde"[num];
26835 ret = 4;
26836 }
26837 clonei->mask_mode = VOIDmode;
26838 switch (clonei->vecsize_mangle)
26839 {
26840 case 'b':
26841 clonei->vecsize_int = 128;
26842 clonei->vecsize_float = 128;
26843 break;
26844 case 'c':
26845 clonei->vecsize_int = 128;
26846 clonei->vecsize_float = 256;
26847 break;
26848 case 'd':
26849 clonei->vecsize_int = 256;
26850 clonei->vecsize_float = 256;
26851 break;
26852 case 'e':
26853 clonei->vecsize_int = 512;
26854 clonei->vecsize_float = 512;
26855 if (TYPE_MODE (base_type) == QImode)
26856 clonei->mask_mode = DImode;
26857 else
26858 clonei->mask_mode = SImode;
26859 break;
26860 }
26861 if (clonei->simdlen == 0)
26862 {
26863 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
26864 clonei->simdlen = clonei->vecsize_int;
26865 else
26866 clonei->simdlen = clonei->vecsize_float;
26867 clonei->simdlen = clonei->simdlen
26868 / GET_MODE_BITSIZE (TYPE_MODE (base_type));
26869 }
26870 else if (clonei->simdlen > 16)
26871 {
26872 /* For compatibility with ICC, use the same upper bounds
26873 for simdlen. In particular, for CTYPE below, use the return type,
26874 unless the function returns void, in that case use the characteristic
26875 type. If it is possible for given SIMDLEN to pass CTYPE value
26876 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
26877 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
26878 emit corresponding clone. */
26879 tree ctype = ret_type;
26880 if (VOID_TYPE_P (ret_type))
26881 ctype = base_type;
26882 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
26883 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
26884 cnt /= clonei->vecsize_int;
26885 else
26886 cnt /= clonei->vecsize_float;
26887 if (cnt > (TARGET_64BIT ? 16 : 8))
26888 {
26889 if (explicit_p)
26890 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
26891 "unsupported simdlen %wd",
26892 clonei->simdlen.to_constant ());
26893 return 0;
26894 }
26895 }
26896 return ret;
26897}
26898
26899/* If SIMD clone NODE can't be used in a vectorized loop
26900 in current function, return -1, otherwise return a badness of using it
26901 (0 if it is most desirable from vecsize_mangle point of view, 1
26902 slightly less desirable, etc.). */
26903
26904static int
26905ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
26906{
26907 switch (node->simdclone->vecsize_mangle)
26908 {
26909 case 'b':
26910 if (!TARGET_SSE2)
26911 return -1;
26912 if (!TARGET_AVX)
26913 return 0;
26914 return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
26915 case 'c':
26916 if (!TARGET_AVX)
26917 return -1;
26918 return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
26919 case 'd':
26920 if (!TARGET_AVX2)
26921 return -1;
26922 return TARGET_AVX512F ? 1 : 0;
26923 case 'e':
26924 if (!TARGET_AVX512F)
26925 return -1;
26926 return 0;
26927 default:
26928 gcc_unreachable ();
26929 }
26930}
26931
26932/* This function adjusts the unroll factor based on
26933 the hardware capabilities. For ex, bdver3 has
26934 a loop buffer which makes unrolling of smaller
26935 loops less important. This function decides the
26936 unroll factor using number of memory references
26937 (value 32 is used) as a heuristic. */
26938
26939static unsigned
26940ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
26941{
26942 basic_block *bbs;
26943 rtx_insn *insn;
26944 unsigned i;
26945 unsigned mem_count = 0;
26946
26947 /* Unroll small size loop when unroll factor is not explicitly
26948 specified. */
26949 if (ix86_unroll_only_small_loops && !loop->unroll)
26950 {
26951 if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
26952 return MIN (nunroll, ix86_cost->small_unroll_factor);
26953 else
26954 return 1;
26955 }
26956
26957 if (!TARGET_ADJUST_UNROLL)
26958 return nunroll;
26959
26960 /* Count the number of memory references within the loop body.
26961 This value determines the unrolling factor for bdver3 and bdver4
26962 architectures. */
26963 subrtx_iterator::array_type array;
26964 bbs = get_loop_body (loop);
26965 for (i = 0; i < loop->num_nodes; i++)
26966 FOR_BB_INSNS (bbs[i], insn)
26967 if (NONDEBUG_INSN_P (insn))
26968 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
26969 if (const_rtx x = *iter)
26970 if (MEM_P (x))
26971 {
26972 machine_mode mode = GET_MODE (x);
26973 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
26974 if (n_words > 4)
26975 mem_count += 2;
26976 else
26977 mem_count += 1;
26978 }
26979 free (ptr: bbs);
26980
26981 if (mem_count && mem_count <=32)
26982 return MIN (nunroll, 32 / mem_count);
26983
26984 return nunroll;
26985}
26986
26987
26988/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
26989
26990static bool
26991ix86_float_exceptions_rounding_supported_p (void)
26992{
26993 /* For x87 floating point with standard excess precision handling,
26994 there is no adddf3 pattern (since x87 floating point only has
26995 XFmode operations) so the default hook implementation gets this
26996 wrong. */
26997 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
26998}
26999
27000/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
27001
27002static void
27003ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27004{
27005 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
27006 return;
27007 tree exceptions_var = create_tmp_var_raw (integer_type_node);
27008 if (TARGET_80387)
27009 {
27010 tree fenv_index_type = build_index_type (size_int (6));
27011 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
27012 tree fenv_var = create_tmp_var_raw (fenv_type);
27013 TREE_ADDRESSABLE (fenv_var) = 1;
27014 tree fenv_ptr = build_pointer_type (fenv_type);
27015 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
27016 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
27017 tree fnstenv = get_ix86_builtin (c: IX86_BUILTIN_FNSTENV);
27018 tree fldenv = get_ix86_builtin (c: IX86_BUILTIN_FLDENV);
27019 tree fnstsw = get_ix86_builtin (c: IX86_BUILTIN_FNSTSW);
27020 tree fnclex = get_ix86_builtin (c: IX86_BUILTIN_FNCLEX);
27021 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
27022 tree hold_fnclex = build_call_expr (fnclex, 0);
27023 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
27024 NULL_TREE, NULL_TREE);
27025 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
27026 hold_fnclex);
27027 *clear = build_call_expr (fnclex, 0);
27028 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
27029 tree fnstsw_call = build_call_expr (fnstsw, 0);
27030 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
27031 fnstsw_call, NULL_TREE, NULL_TREE);
27032 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
27033 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
27034 exceptions_var, exceptions_x87,
27035 NULL_TREE, NULL_TREE);
27036 *update = build2 (COMPOUND_EXPR, integer_type_node,
27037 sw_mod, update_mod);
27038 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
27039 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
27040 }
27041 if (TARGET_SSE && TARGET_SSE_MATH)
27042 {
27043 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
27044 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
27045 tree stmxcsr = get_ix86_builtin (c: IX86_BUILTIN_STMXCSR);
27046 tree ldmxcsr = get_ix86_builtin (c: IX86_BUILTIN_LDMXCSR);
27047 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
27048 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
27049 mxcsr_orig_var, stmxcsr_hold_call,
27050 NULL_TREE, NULL_TREE);
27051 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
27052 mxcsr_orig_var,
27053 build_int_cst (unsigned_type_node, 0x1f80));
27054 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
27055 build_int_cst (unsigned_type_node, 0xffffffc0));
27056 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
27057 mxcsr_mod_var, hold_mod_val,
27058 NULL_TREE, NULL_TREE);
27059 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
27060 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
27061 hold_assign_orig, hold_assign_mod);
27062 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
27063 ldmxcsr_hold_call);
27064 if (*hold)
27065 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
27066 else
27067 *hold = hold_all;
27068 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
27069 if (*clear)
27070 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
27071 ldmxcsr_clear_call);
27072 else
27073 *clear = ldmxcsr_clear_call;
27074 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
27075 tree exceptions_sse = fold_convert (integer_type_node,
27076 stxmcsr_update_call);
27077 if (*update)
27078 {
27079 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
27080 exceptions_var, exceptions_sse);
27081 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
27082 exceptions_var, exceptions_mod);
27083 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
27084 exceptions_assign);
27085 }
27086 else
27087 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
27088 exceptions_sse, NULL_TREE, NULL_TREE);
27089 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
27090 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
27091 ldmxcsr_update_call);
27092 }
27093 tree atomic_feraiseexcept
27094 = builtin_decl_implicit (fncode: BUILT_IN_ATOMIC_FERAISEEXCEPT);
27095 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
27096 1, exceptions_var);
27097 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
27098 atomic_feraiseexcept_call);
27099}
27100
27101#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
27102/* For i386, common symbol is local only for non-PIE binaries. For
27103 x86-64, common symbol is local only for non-PIE binaries or linker
27104 supports copy reloc in PIE binaries. */
27105
27106static bool
27107ix86_binds_local_p (const_tree exp)
27108{
27109 bool direct_extern_access
27110 = (ix86_direct_extern_access
27111 && !(VAR_OR_FUNCTION_DECL_P (exp)
27112 && lookup_attribute (attr_name: "nodirect_extern_access",
27113 DECL_ATTRIBUTES (exp))));
27114 if (!direct_extern_access)
27115 ix86_has_no_direct_extern_access = true;
27116 return default_binds_local_p_3 (exp, flag_shlib != 0, true,
27117 direct_extern_access,
27118 (direct_extern_access
27119 && (!flag_pic
27120 || (TARGET_64BIT
27121 && HAVE_LD_PIE_COPYRELOC != 0))));
27122}
27123
27124/* If flag_pic or ix86_direct_extern_access is false, then neither
27125 local nor global relocs should be placed in readonly memory. */
27126
27127static int
27128ix86_reloc_rw_mask (void)
27129{
27130 return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
27131}
27132#endif
27133
27134/* Return true iff ADDR can be used as a symbolic base address. */
27135
27136static bool
27137symbolic_base_address_p (rtx addr)
27138{
27139 if (SYMBOL_REF_P (addr))
27140 return true;
27141
27142 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
27143 return true;
27144
27145 return false;
27146}
27147
27148/* Return true iff ADDR can be used as a base address. */
27149
27150static bool
27151base_address_p (rtx addr)
27152{
27153 if (REG_P (addr))
27154 return true;
27155
27156 if (symbolic_base_address_p (addr))
27157 return true;
27158
27159 return false;
27160}
27161
27162/* If MEM is in the form of [(base+symbase)+offset], extract the three
27163 parts of address and set to BASE, SYMBASE and OFFSET, otherwise
27164 return false. */
27165
27166static bool
27167extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
27168{
27169 rtx addr;
27170
27171 gcc_assert (MEM_P (mem));
27172
27173 addr = XEXP (mem, 0);
27174
27175 if (GET_CODE (addr) == CONST)
27176 addr = XEXP (addr, 0);
27177
27178 if (base_address_p (addr))
27179 {
27180 *base = addr;
27181 *symbase = const0_rtx;
27182 *offset = const0_rtx;
27183 return true;
27184 }
27185
27186 if (GET_CODE (addr) == PLUS
27187 && base_address_p (XEXP (addr, 0)))
27188 {
27189 rtx addend = XEXP (addr, 1);
27190
27191 if (GET_CODE (addend) == CONST)
27192 addend = XEXP (addend, 0);
27193
27194 if (CONST_INT_P (addend))
27195 {
27196 *base = XEXP (addr, 0);
27197 *symbase = const0_rtx;
27198 *offset = addend;
27199 return true;
27200 }
27201
27202 /* Also accept REG + symbolic ref, with or without a CONST_INT
27203 offset. */
27204 if (REG_P (XEXP (addr, 0)))
27205 {
27206 if (symbolic_base_address_p (addr: addend))
27207 {
27208 *base = XEXP (addr, 0);
27209 *symbase = addend;
27210 *offset = const0_rtx;
27211 return true;
27212 }
27213
27214 if (GET_CODE (addend) == PLUS
27215 && symbolic_base_address_p (XEXP (addend, 0))
27216 && CONST_INT_P (XEXP (addend, 1)))
27217 {
27218 *base = XEXP (addr, 0);
27219 *symbase = XEXP (addend, 0);
27220 *offset = XEXP (addend, 1);
27221 return true;
27222 }
27223 }
27224 }
27225
27226 return false;
27227}
27228
27229/* Given OPERANDS of consecutive load/store, check if we can merge
27230 them into move multiple. LOAD is true if they are load instructions.
27231 MODE is the mode of memory operands. */
27232
27233bool
27234ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
27235 machine_mode mode)
27236{
27237 HOST_WIDE_INT offval_1, offval_2, msize;
27238 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
27239 symbase_1, symbase_2, offset_1, offset_2;
27240
27241 if (load)
27242 {
27243 mem_1 = operands[1];
27244 mem_2 = operands[3];
27245 reg_1 = operands[0];
27246 reg_2 = operands[2];
27247 }
27248 else
27249 {
27250 mem_1 = operands[0];
27251 mem_2 = operands[2];
27252 reg_1 = operands[1];
27253 reg_2 = operands[3];
27254 }
27255
27256 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
27257
27258 if (REGNO (reg_1) != REGNO (reg_2))
27259 return false;
27260
27261 /* Check if the addresses are in the form of [base+offset]. */
27262 if (!extract_base_offset_in_addr (mem: mem_1, base: &base_1, symbase: &symbase_1, offset: &offset_1))
27263 return false;
27264 if (!extract_base_offset_in_addr (mem: mem_2, base: &base_2, symbase: &symbase_2, offset: &offset_2))
27265 return false;
27266
27267 /* Check if the bases are the same. */
27268 if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
27269 return false;
27270
27271 offval_1 = INTVAL (offset_1);
27272 offval_2 = INTVAL (offset_2);
27273 msize = GET_MODE_SIZE (mode);
27274 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
27275 if (offval_1 + msize != offval_2)
27276 return false;
27277
27278 return true;
27279}
27280
27281/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27282
27283static bool
27284ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
27285 optimization_type opt_type)
27286{
27287 switch (op)
27288 {
27289 case asin_optab:
27290 case acos_optab:
27291 case log1p_optab:
27292 case exp_optab:
27293 case exp10_optab:
27294 case exp2_optab:
27295 case expm1_optab:
27296 case ldexp_optab:
27297 case scalb_optab:
27298 case round_optab:
27299 case lround_optab:
27300 return opt_type == OPTIMIZE_FOR_SPEED;
27301
27302 case rint_optab:
27303 if (SSE_FLOAT_MODE_P (mode1)
27304 && TARGET_SSE_MATH
27305 && !flag_trapping_math
27306 && !TARGET_SSE4_1
27307 && mode1 != HFmode)
27308 return opt_type == OPTIMIZE_FOR_SPEED;
27309 return true;
27310
27311 case floor_optab:
27312 case ceil_optab:
27313 case btrunc_optab:
27314 if ((SSE_FLOAT_MODE_P (mode1)
27315 && TARGET_SSE_MATH
27316 && TARGET_SSE4_1)
27317 || mode1 == HFmode)
27318 return true;
27319 return opt_type == OPTIMIZE_FOR_SPEED;
27320
27321 case rsqrt_optab:
27322 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode: mode1);
27323
27324 default:
27325 return true;
27326 }
27327}
27328
27329/* Address space support.
27330
27331 This is not "far pointers" in the 16-bit sense, but an easy way
27332 to use %fs and %gs segment prefixes. Therefore:
27333
27334 (a) All address spaces have the same modes,
27335 (b) All address spaces have the same addresss forms,
27336 (c) While %fs and %gs are technically subsets of the generic
27337 address space, they are probably not subsets of each other.
27338 (d) Since we have no access to the segment base register values
27339 without resorting to a system call, we cannot convert a
27340 non-default address space to a default address space.
27341 Therefore we do not claim %fs or %gs are subsets of generic.
27342
27343 Therefore we can (mostly) use the default hooks. */
27344
27345/* All use of segmentation is assumed to make address 0 valid. */
27346
27347static bool
27348ix86_addr_space_zero_address_valid (addr_space_t as)
27349{
27350 return as != ADDR_SPACE_GENERIC;
27351}
27352
27353static void
27354ix86_init_libfuncs (void)
27355{
27356 if (TARGET_64BIT)
27357 {
27358 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
27359 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
27360 }
27361 else
27362 {
27363 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
27364 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
27365 }
27366
27367#if TARGET_MACHO
27368 darwin_rename_builtins ();
27369#endif
27370}
27371
27372/* Set the value of FLT_EVAL_METHOD in float.h. When using only the
27373 FPU, assume that the fpcw is set to extended precision; when using
27374 only SSE, rounding is correct; when using both SSE and the FPU,
27375 the rounding precision is indeterminate, since either may be chosen
27376 apparently at random. */
27377
27378static enum flt_eval_method
27379ix86_get_excess_precision (enum excess_precision_type type)
27380{
27381 switch (type)
27382 {
27383 case EXCESS_PRECISION_TYPE_FAST:
27384 /* The fastest type to promote to will always be the native type,
27385 whether that occurs with implicit excess precision or
27386 otherwise. */
27387 return TARGET_AVX512FP16
27388 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
27389 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27390 case EXCESS_PRECISION_TYPE_STANDARD:
27391 case EXCESS_PRECISION_TYPE_IMPLICIT:
27392 /* Otherwise, the excess precision we want when we are
27393 in a standards compliant mode, and the implicit precision we
27394 provide would be identical were it not for the unpredictable
27395 cases. */
27396 if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
27397 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
27398 else if (!TARGET_80387)
27399 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27400 else if (!TARGET_MIX_SSE_I387)
27401 {
27402 if (!(TARGET_SSE && TARGET_SSE_MATH))
27403 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
27404 else if (TARGET_SSE2)
27405 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
27406 }
27407
27408 /* If we are in standards compliant mode, but we know we will
27409 calculate in unpredictable precision, return
27410 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
27411 excess precision if the target can't guarantee it will honor
27412 it. */
27413 return (type == EXCESS_PRECISION_TYPE_STANDARD
27414 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
27415 : FLT_EVAL_METHOD_UNPREDICTABLE);
27416 case EXCESS_PRECISION_TYPE_FLOAT16:
27417 if (TARGET_80387
27418 && !(TARGET_SSE_MATH && TARGET_SSE))
27419 error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
27420 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
27421 default:
27422 gcc_unreachable ();
27423 }
27424
27425 return FLT_EVAL_METHOD_UNPREDICTABLE;
27426}
27427
27428/* Return true if _BitInt(N) is supported and fill its details into *INFO. */
27429bool
27430ix86_bitint_type_info (int n, struct bitint_info *info)
27431{
27432 if (n <= 8)
27433 info->limb_mode = QImode;
27434 else if (n <= 16)
27435 info->limb_mode = HImode;
27436 else if (n <= 32 || (!TARGET_64BIT && n > 64))
27437 info->limb_mode = SImode;
27438 else
27439 info->limb_mode = DImode;
27440 info->abi_limb_mode = info->limb_mode;
27441 info->big_endian = false;
27442 info->extended = false;
27443 return true;
27444}
27445
27446/* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return DFmode, TFmode
27447 or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
27448 based on long double bits, go with the default one for the others. */
27449
27450static machine_mode
27451ix86_c_mode_for_floating_type (enum tree_index ti)
27452{
27453 if (ti == TI_LONG_DOUBLE_TYPE)
27454 return (TARGET_LONG_DOUBLE_64 ? DFmode
27455 : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode));
27456 return default_mode_for_floating_type (ti);
27457}
27458
27459/* Returns modified FUNCTION_TYPE for cdtor callabi. */
27460tree
27461ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype)
27462{
27463 if (TARGET_64BIT
27464 || TARGET_RTD
27465 || ix86_function_type_abi (fntype) != MS_ABI)
27466 return fntype;
27467 /* For 32-bit MS ABI add thiscall attribute. */
27468 tree attribs = tree_cons (get_identifier ("thiscall"), NULL_TREE,
27469 TYPE_ATTRIBUTES (fntype));
27470 return build_type_attribute_variant (fntype, attribs);
27471}
27472
27473/* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
27474 decrements by exactly 2 no matter what the position was, there is no pushb.
27475
27476 But as CIE data alignment factor on this arch is -4 for 32bit targets
27477 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
27478 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
27479
27480poly_int64
27481ix86_push_rounding (poly_int64 bytes)
27482{
27483 return ROUND_UP (bytes, UNITS_PER_WORD);
27484}
27485
27486/* Use 8 bits metadata start from bit48 for LAM_U48,
27487 6 bits metadat start from bit57 for LAM_U57. */
27488#define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
27489 ? 48 \
27490 : (ix86_lam_type == lam_u57 ? 57 : 0))
27491#define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
27492 ? 8 \
27493 : (ix86_lam_type == lam_u57 ? 6 : 0))
27494
27495/* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
27496bool
27497ix86_memtag_can_tag_addresses ()
27498{
27499 return ix86_lam_type != lam_none && TARGET_LP64;
27500}
27501
27502/* Implement TARGET_MEMTAG_TAG_BITSIZE. */
27503unsigned char
27504ix86_memtag_tag_bitsize ()
27505{
27506 return IX86_HWASAN_TAG_SIZE;
27507}
27508
27509/* Implement TARGET_MEMTAG_SET_TAG. */
27510rtx
27511ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
27512{
27513 /* default_memtag_insert_random_tag may
27514 generate tag with value more than 6 bits. */
27515 if (ix86_lam_type == lam_u57)
27516 {
27517 unsigned HOST_WIDE_INT and_imm
27518 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
27519
27520 emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
27521 }
27522 tag = expand_simple_binop (Pmode, ASHIFT, tag,
27523 GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
27524 /* unsignedp = */1, OPTAB_WIDEN);
27525 rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
27526 /* unsignedp = */1, OPTAB_DIRECT);
27527 return ret;
27528}
27529
27530/* Implement TARGET_MEMTAG_EXTRACT_TAG. */
27531rtx
27532ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
27533{
27534 rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
27535 GEN_INT (IX86_HWASAN_SHIFT), target,
27536 /* unsignedp = */0,
27537 OPTAB_DIRECT);
27538 rtx ret = gen_reg_rtx (QImode);
27539 /* Mask off bit63 when LAM_U57. */
27540 if (ix86_lam_type == lam_u57)
27541 {
27542 unsigned HOST_WIDE_INT and_imm
27543 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
27544 emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
27545 gen_int_mode (and_imm, QImode)));
27546 }
27547 else
27548 emit_move_insn (ret, gen_lowpart (QImode, tag));
27549 return ret;
27550}
27551
27552/* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
27553rtx
27554ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
27555{
27556 /* Leave bit63 alone. */
27557 rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
27558 + (HOST_WIDE_INT_1U << 63) - 1),
27559 Pmode);
27560 rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
27561 tag_mask, target, true,
27562 OPTAB_DIRECT);
27563 gcc_assert (untagged_base);
27564 return untagged_base;
27565}
27566
27567/* Implement TARGET_MEMTAG_ADD_TAG. */
27568rtx
27569ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
27570{
27571 rtx base_tag = gen_reg_rtx (QImode);
27572 rtx base_addr = gen_reg_rtx (Pmode);
27573 rtx tagged_addr = gen_reg_rtx (Pmode);
27574 rtx new_tag = gen_reg_rtx (QImode);
27575 unsigned HOST_WIDE_INT and_imm
27576 = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
27577
27578 /* When there's "overflow" in tag adding,
27579 need to mask the most significant bit off. */
27580 emit_move_insn (base_tag, ix86_memtag_extract_tag (tagged_pointer: base, NULL_RTX));
27581 emit_move_insn (base_addr,
27582 ix86_memtag_untagged_pointer (tagged_pointer: base, NULL_RTX));
27583 emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
27584 emit_move_insn (new_tag, base_tag);
27585 emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
27586 emit_move_insn (tagged_addr,
27587 ix86_memtag_set_tag (untagged: base_addr, tag: new_tag, NULL_RTX));
27588 return plus_constant (Pmode, tagged_addr, offset);
27589}
27590
27591/* Implement TARGET_HAVE_CCMP. */
27592static bool
27593ix86_have_ccmp ()
27594{
27595 return (bool) TARGET_APX_CCMP;
27596}
27597
27598/* Implement TARGET_MODE_CAN_TRANSFER_BITS. */
27599static bool
27600ix86_mode_can_transfer_bits (machine_mode mode)
27601{
27602 if (GET_MODE_CLASS (mode) == MODE_FLOAT
27603 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
27604 switch (GET_MODE_INNER (mode))
27605 {
27606 case E_SFmode:
27607 case E_DFmode:
27608 /* These suffer from normalization upon load when not using SSE. */
27609 return !(ix86_fpmath & FPMATH_387);
27610 default:
27611 return true;
27612 }
27613
27614 return true;
27615}
27616
27617/* Implement TARGET_REDZONE_CLOBBER. */
27618static rtx
27619ix86_redzone_clobber ()
27620{
27621 cfun->machine->asm_redzone_clobber_seen = true;
27622 if (ix86_using_red_zone ())
27623 {
27624 rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE);
27625 rtx mem = gen_rtx_MEM (BLKmode, base);
27626 set_mem_size (mem, RED_ZONE_SIZE);
27627 return mem;
27628 }
27629 return NULL_RTX;
27630}
27631
27632/* Target-specific selftests. */
27633
27634#if CHECKING_P
27635
27636namespace selftest {
27637
27638/* Verify that hard regs are dumped as expected (in compact mode). */
27639
27640static void
27641ix86_test_dumping_hard_regs ()
27642{
27643 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
27644 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
27645}
27646
27647/* Test dumping an insn with repeated references to the same SCRATCH,
27648 to verify the rtx_reuse code. */
27649
27650static void
27651ix86_test_dumping_memory_blockage ()
27652{
27653 set_new_first_and_last_insn (NULL, NULL);
27654
27655 rtx pat = gen_memory_blockage ();
27656 rtx_reuse_manager r;
27657 r.preprocess (x: pat);
27658
27659 /* Verify that the repeated references to the SCRATCH show use
27660 reuse IDS. The first should be prefixed with a reuse ID,
27661 and the second should be dumped as a "reuse_rtx" of that ID.
27662 The expected string assumes Pmode == DImode. */
27663 if (Pmode == DImode)
27664 ASSERT_RTL_DUMP_EQ_WITH_REUSE
27665 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
27666 " (unspec:BLK [\n"
27667 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
27668 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
27669}
27670
27671/* Verify loading an RTL dump; specifically a dump of copying
27672 a param on x86_64 from a hard reg into the frame.
27673 This test is target-specific since the dump contains target-specific
27674 hard reg names. */
27675
27676static void
27677ix86_test_loading_dump_fragment_1 ()
27678{
27679 rtl_dump_test t (SELFTEST_LOCATION,
27680 locate_file (path: "x86_64/copy-hard-reg-into-frame.rtl"));
27681
27682 rtx_insn *insn = get_insn_by_uid (uid: 1);
27683
27684 /* The block structure and indentation here is purely for
27685 readability; it mirrors the structure of the rtx. */
27686 tree mem_expr;
27687 {
27688 rtx pat = PATTERN (insn);
27689 ASSERT_EQ (SET, GET_CODE (pat));
27690 {
27691 rtx dest = SET_DEST (pat);
27692 ASSERT_EQ (MEM, GET_CODE (dest));
27693 /* Verify the "/c" was parsed. */
27694 ASSERT_TRUE (RTX_FLAG (dest, call));
27695 ASSERT_EQ (SImode, GET_MODE (dest));
27696 {
27697 rtx addr = XEXP (dest, 0);
27698 ASSERT_EQ (PLUS, GET_CODE (addr));
27699 ASSERT_EQ (DImode, GET_MODE (addr));
27700 {
27701 rtx lhs = XEXP (addr, 0);
27702 /* Verify that the "frame" REG was consolidated. */
27703 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
27704 }
27705 {
27706 rtx rhs = XEXP (addr, 1);
27707 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
27708 ASSERT_EQ (-4, INTVAL (rhs));
27709 }
27710 }
27711 /* Verify the "[1 i+0 S4 A32]" was parsed. */
27712 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
27713 /* "i" should have been handled by synthesizing a global int
27714 variable named "i". */
27715 mem_expr = MEM_EXPR (dest);
27716 ASSERT_NE (mem_expr, NULL);
27717 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
27718 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
27719 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
27720 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
27721 /* "+0". */
27722 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
27723 ASSERT_EQ (0, MEM_OFFSET (dest));
27724 /* "S4". */
27725 ASSERT_EQ (4, MEM_SIZE (dest));
27726 /* "A32. */
27727 ASSERT_EQ (32, MEM_ALIGN (dest));
27728 }
27729 {
27730 rtx src = SET_SRC (pat);
27731 ASSERT_EQ (REG, GET_CODE (src));
27732 ASSERT_EQ (SImode, GET_MODE (src));
27733 ASSERT_EQ (5, REGNO (src));
27734 tree reg_expr = REG_EXPR (src);
27735 /* "i" here should point to the same var as for the MEM_EXPR. */
27736 ASSERT_EQ (reg_expr, mem_expr);
27737 }
27738 }
27739}
27740
27741/* Verify that the RTL loader copes with a call_insn dump.
27742 This test is target-specific since the dump contains a target-specific
27743 hard reg name. */
27744
27745static void
27746ix86_test_loading_call_insn ()
27747{
27748 /* The test dump includes register "xmm0", where requires TARGET_SSE
27749 to exist. */
27750 if (!TARGET_SSE)
27751 return;
27752
27753 rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/call-insn.rtl"));
27754
27755 rtx_insn *insn = get_insns ();
27756 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
27757
27758 /* "/j". */
27759 ASSERT_TRUE (RTX_FLAG (insn, jump));
27760
27761 rtx pat = PATTERN (insn);
27762 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
27763
27764 /* Verify REG_NOTES. */
27765 {
27766 /* "(expr_list:REG_CALL_DECL". */
27767 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
27768 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
27769 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
27770
27771 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
27772 rtx_expr_list *note1 = note0->next ();
27773 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
27774
27775 ASSERT_EQ (NULL, note1->next ());
27776 }
27777
27778 /* Verify CALL_INSN_FUNCTION_USAGE. */
27779 {
27780 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
27781 rtx_expr_list *usage
27782 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
27783 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
27784 ASSERT_EQ (DFmode, GET_MODE (usage));
27785 ASSERT_EQ (USE, GET_CODE (usage->element ()));
27786 ASSERT_EQ (NULL, usage->next ());
27787 }
27788}
27789
27790/* Verify that the RTL loader copes a dump from print_rtx_function.
27791 This test is target-specific since the dump contains target-specific
27792 hard reg names. */
27793
27794static void
27795ix86_test_loading_full_dump ()
27796{
27797 rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/times-two.rtl"));
27798
27799 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
27800
27801 rtx_insn *insn_1 = get_insn_by_uid (uid: 1);
27802 ASSERT_EQ (NOTE, GET_CODE (insn_1));
27803
27804 rtx_insn *insn_7 = get_insn_by_uid (uid: 7);
27805 ASSERT_EQ (INSN, GET_CODE (insn_7));
27806 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
27807
27808 rtx_insn *insn_15 = get_insn_by_uid (uid: 15);
27809 ASSERT_EQ (INSN, GET_CODE (insn_15));
27810 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
27811
27812 /* Verify crtl->return_rtx. */
27813 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
27814 ASSERT_EQ (0, REGNO (crtl->return_rtx));
27815 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
27816}
27817
27818/* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
27819 In particular, verify that it correctly loads the 2nd operand.
27820 This test is target-specific since these are machine-specific
27821 operands (and enums). */
27822
27823static void
27824ix86_test_loading_unspec ()
27825{
27826 rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/unspec.rtl"));
27827
27828 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
27829
27830 ASSERT_TRUE (cfun);
27831
27832 /* Test of an UNSPEC. */
27833 rtx_insn *insn = get_insns ();
27834 ASSERT_EQ (INSN, GET_CODE (insn));
27835 rtx set = single_set (insn);
27836 ASSERT_NE (NULL, set);
27837 rtx dst = SET_DEST (set);
27838 ASSERT_EQ (MEM, GET_CODE (dst));
27839 rtx src = SET_SRC (set);
27840 ASSERT_EQ (UNSPEC, GET_CODE (src));
27841 ASSERT_EQ (BLKmode, GET_MODE (src));
27842 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
27843
27844 rtx v0 = XVECEXP (src, 0, 0);
27845
27846 /* Verify that the two uses of the first SCRATCH have pointer
27847 equality. */
27848 rtx scratch_a = XEXP (dst, 0);
27849 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
27850
27851 rtx scratch_b = XEXP (v0, 0);
27852 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
27853
27854 ASSERT_EQ (scratch_a, scratch_b);
27855
27856 /* Verify that the two mems are thus treated as equal. */
27857 ASSERT_TRUE (rtx_equal_p (dst, v0));
27858
27859 /* Verify that the insn is recognized. */
27860 ASSERT_NE(-1, recog_memoized (insn));
27861
27862 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
27863 insn = NEXT_INSN (insn);
27864 ASSERT_EQ (INSN, GET_CODE (insn));
27865
27866 set = single_set (insn);
27867 ASSERT_NE (NULL, set);
27868
27869 src = SET_SRC (set);
27870 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
27871 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
27872}
27873
27874/* Run all target-specific selftests. */
27875
27876static void
27877ix86_run_selftests (void)
27878{
27879 ix86_test_dumping_hard_regs ();
27880 ix86_test_dumping_memory_blockage ();
27881
27882 /* Various tests of loading RTL dumps, here because they contain
27883 ix86-isms (e.g. names of hard regs). */
27884 ix86_test_loading_dump_fragment_1 ();
27885 ix86_test_loading_call_insn ();
27886 ix86_test_loading_full_dump ();
27887 ix86_test_loading_unspec ();
27888}
27889
27890} // namespace selftest
27891
27892#endif /* CHECKING_P */
27893
27894static const scoped_attribute_specs *const ix86_attribute_table[] =
27895{
27896 &ix86_gnu_attribute_table
27897};
27898
27899/* Initialize the GCC target structure. */
27900#undef TARGET_RETURN_IN_MEMORY
27901#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
27902
27903#undef TARGET_LEGITIMIZE_ADDRESS
27904#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
27905
27906#undef TARGET_ATTRIBUTE_TABLE
27907#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
27908#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
27909#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
27910#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
27911# undef TARGET_MERGE_DECL_ATTRIBUTES
27912# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
27913#endif
27914
27915#undef TARGET_INVALID_CONVERSION
27916#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
27917
27918#undef TARGET_INVALID_UNARY_OP
27919#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
27920
27921#undef TARGET_INVALID_BINARY_OP
27922#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
27923
27924#undef TARGET_COMP_TYPE_ATTRIBUTES
27925#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
27926
27927#undef TARGET_INIT_BUILTINS
27928#define TARGET_INIT_BUILTINS ix86_init_builtins
27929#undef TARGET_BUILTIN_DECL
27930#define TARGET_BUILTIN_DECL ix86_builtin_decl
27931#undef TARGET_EXPAND_BUILTIN
27932#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
27933
27934#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
27935#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
27936 ix86_builtin_vectorized_function
27937
27938#undef TARGET_VECTORIZE_BUILTIN_GATHER
27939#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
27940
27941#undef TARGET_VECTORIZE_BUILTIN_SCATTER
27942#define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
27943
27944#undef TARGET_BUILTIN_RECIPROCAL
27945#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
27946
27947#undef TARGET_ASM_FUNCTION_EPILOGUE
27948#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
27949
27950#undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
27951#define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
27952 ix86_print_patchable_function_entry
27953
27954#undef TARGET_ENCODE_SECTION_INFO
27955#ifndef SUBTARGET_ENCODE_SECTION_INFO
27956#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
27957#else
27958#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
27959#endif
27960
27961#undef TARGET_ASM_OPEN_PAREN
27962#define TARGET_ASM_OPEN_PAREN ""
27963#undef TARGET_ASM_CLOSE_PAREN
27964#define TARGET_ASM_CLOSE_PAREN ""
27965
27966#undef TARGET_ASM_BYTE_OP
27967#define TARGET_ASM_BYTE_OP ASM_BYTE
27968
27969#undef TARGET_ASM_ALIGNED_HI_OP
27970#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
27971#undef TARGET_ASM_ALIGNED_SI_OP
27972#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
27973#ifdef ASM_QUAD
27974#undef TARGET_ASM_ALIGNED_DI_OP
27975#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
27976#endif
27977
27978#undef TARGET_PROFILE_BEFORE_PROLOGUE
27979#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
27980
27981#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
27982#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
27983
27984#undef TARGET_ASM_UNALIGNED_HI_OP
27985#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
27986#undef TARGET_ASM_UNALIGNED_SI_OP
27987#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
27988#undef TARGET_ASM_UNALIGNED_DI_OP
27989#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
27990
27991#undef TARGET_PRINT_OPERAND
27992#define TARGET_PRINT_OPERAND ix86_print_operand
27993#undef TARGET_PRINT_OPERAND_ADDRESS
27994#define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
27995#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
27996#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
27997#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
27998#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
27999
28000#undef TARGET_SCHED_INIT_GLOBAL
28001#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
28002#undef TARGET_SCHED_ADJUST_COST
28003#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
28004#undef TARGET_SCHED_ISSUE_RATE
28005#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
28006#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
28007#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
28008 ia32_multipass_dfa_lookahead
28009#undef TARGET_SCHED_MACRO_FUSION_P
28010#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
28011#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
28012#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
28013
28014#undef TARGET_FUNCTION_OK_FOR_SIBCALL
28015#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
28016
28017#undef TARGET_MEMMODEL_CHECK
28018#define TARGET_MEMMODEL_CHECK ix86_memmodel_check
28019
28020#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
28021#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
28022
28023#ifdef HAVE_AS_TLS
28024#undef TARGET_HAVE_TLS
28025#define TARGET_HAVE_TLS true
28026#endif
28027#undef TARGET_CANNOT_FORCE_CONST_MEM
28028#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
28029#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
28030#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
28031
28032#undef TARGET_DELEGITIMIZE_ADDRESS
28033#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
28034
28035#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
28036#define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
28037
28038#undef TARGET_MS_BITFIELD_LAYOUT_P
28039#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
28040
28041#if TARGET_MACHO
28042#undef TARGET_BINDS_LOCAL_P
28043#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
28044#else
28045#undef TARGET_BINDS_LOCAL_P
28046#define TARGET_BINDS_LOCAL_P ix86_binds_local_p
28047#endif
28048#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28049#undef TARGET_BINDS_LOCAL_P
28050#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
28051#endif
28052
28053#undef TARGET_ASM_OUTPUT_MI_THUNK
28054#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
28055#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
28056#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
28057
28058#undef TARGET_ASM_FILE_START
28059#define TARGET_ASM_FILE_START x86_file_start
28060
28061#undef TARGET_OPTION_OVERRIDE
28062#define TARGET_OPTION_OVERRIDE ix86_option_override
28063
28064#undef TARGET_REGISTER_MOVE_COST
28065#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
28066#undef TARGET_MEMORY_MOVE_COST
28067#define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
28068#undef TARGET_RTX_COSTS
28069#define TARGET_RTX_COSTS ix86_rtx_costs
28070#undef TARGET_INSN_COST
28071#define TARGET_INSN_COST ix86_insn_cost
28072#undef TARGET_ADDRESS_COST
28073#define TARGET_ADDRESS_COST ix86_address_cost
28074
28075#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
28076#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
28077 ix86_use_by_pieces_infrastructure_p
28078
28079#undef TARGET_OVERLAP_OP_BY_PIECES_P
28080#define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
28081
28082#undef TARGET_FLAGS_REGNUM
28083#define TARGET_FLAGS_REGNUM FLAGS_REG
28084#undef TARGET_FIXED_CONDITION_CODE_REGS
28085#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
28086#undef TARGET_CC_MODES_COMPATIBLE
28087#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
28088
28089#undef TARGET_MACHINE_DEPENDENT_REORG
28090#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
28091
28092#undef TARGET_BUILD_BUILTIN_VA_LIST
28093#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
28094
28095#undef TARGET_FOLD_BUILTIN
28096#define TARGET_FOLD_BUILTIN ix86_fold_builtin
28097
28098#undef TARGET_GIMPLE_FOLD_BUILTIN
28099#define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
28100
28101#undef TARGET_COMPARE_VERSION_PRIORITY
28102#define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
28103
28104#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
28105#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
28106 ix86_generate_version_dispatcher_body
28107
28108#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
28109#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
28110 ix86_get_function_versions_dispatcher
28111
28112#undef TARGET_ENUM_VA_LIST_P
28113#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
28114
28115#undef TARGET_FN_ABI_VA_LIST
28116#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
28117
28118#undef TARGET_CANONICAL_VA_LIST_TYPE
28119#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
28120
28121#undef TARGET_EXPAND_BUILTIN_VA_START
28122#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
28123
28124#undef TARGET_MD_ASM_ADJUST
28125#define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
28126
28127#undef TARGET_C_EXCESS_PRECISION
28128#define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
28129#undef TARGET_C_BITINT_TYPE_INFO
28130#define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
28131#undef TARGET_C_MODE_FOR_FLOATING_TYPE
28132#define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type
28133#undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE
28134#define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype
28135#undef TARGET_PROMOTE_PROTOTYPES
28136#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
28137#undef TARGET_PUSH_ARGUMENT
28138#define TARGET_PUSH_ARGUMENT ix86_push_argument
28139#undef TARGET_SETUP_INCOMING_VARARGS
28140#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
28141#undef TARGET_MUST_PASS_IN_STACK
28142#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
28143#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
28144#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
28145#undef TARGET_FUNCTION_ARG_ADVANCE
28146#define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
28147#undef TARGET_FUNCTION_ARG
28148#define TARGET_FUNCTION_ARG ix86_function_arg
28149#undef TARGET_INIT_PIC_REG
28150#define TARGET_INIT_PIC_REG ix86_init_pic_reg
28151#undef TARGET_USE_PSEUDO_PIC_REG
28152#define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
28153#undef TARGET_FUNCTION_ARG_BOUNDARY
28154#define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
28155#undef TARGET_PASS_BY_REFERENCE
28156#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
28157#undef TARGET_INTERNAL_ARG_POINTER
28158#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
28159#undef TARGET_UPDATE_STACK_BOUNDARY
28160#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
28161#undef TARGET_GET_DRAP_RTX
28162#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
28163#undef TARGET_STRICT_ARGUMENT_NAMING
28164#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
28165#undef TARGET_STATIC_CHAIN
28166#define TARGET_STATIC_CHAIN ix86_static_chain
28167#undef TARGET_TRAMPOLINE_INIT
28168#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
28169#undef TARGET_RETURN_POPS_ARGS
28170#define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
28171
28172#undef TARGET_WARN_FUNC_RETURN
28173#define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
28174
28175#undef TARGET_LEGITIMATE_COMBINED_INSN
28176#define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
28177
28178#undef TARGET_ASAN_SHADOW_OFFSET
28179#define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
28180
28181#undef TARGET_GIMPLIFY_VA_ARG_EXPR
28182#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
28183
28184#undef TARGET_SCALAR_MODE_SUPPORTED_P
28185#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
28186
28187#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
28188#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
28189ix86_libgcc_floating_mode_supported_p
28190
28191#undef TARGET_VECTOR_MODE_SUPPORTED_P
28192#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
28193
28194#undef TARGET_C_MODE_FOR_SUFFIX
28195#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
28196
28197#ifdef HAVE_AS_TLS
28198#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
28199#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
28200#endif
28201
28202#ifdef SUBTARGET_INSERT_ATTRIBUTES
28203#undef TARGET_INSERT_ATTRIBUTES
28204#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
28205#endif
28206
28207#undef TARGET_MANGLE_TYPE
28208#define TARGET_MANGLE_TYPE ix86_mangle_type
28209
28210#undef TARGET_EMIT_SUPPORT_TINFOS
28211#define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
28212
28213#undef TARGET_STACK_PROTECT_GUARD
28214#define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
28215
28216#undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P
28217#define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \
28218 ix86_stack_protect_runtime_enabled_p
28219
28220#if !TARGET_MACHO
28221#undef TARGET_STACK_PROTECT_FAIL
28222#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
28223#endif
28224
28225#undef TARGET_FUNCTION_VALUE
28226#define TARGET_FUNCTION_VALUE ix86_function_value
28227
28228#undef TARGET_FUNCTION_VALUE_REGNO_P
28229#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
28230
28231#undef TARGET_ZERO_CALL_USED_REGS
28232#define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
28233
28234#undef TARGET_PROMOTE_FUNCTION_MODE
28235#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
28236
28237#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
28238#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
28239
28240#undef TARGET_MEMBER_TYPE_FORCES_BLK
28241#define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
28242
28243#undef TARGET_INSTANTIATE_DECLS
28244#define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
28245
28246#undef TARGET_SECONDARY_RELOAD
28247#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
28248#undef TARGET_SECONDARY_MEMORY_NEEDED
28249#define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
28250#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
28251#define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
28252
28253#undef TARGET_CLASS_MAX_NREGS
28254#define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
28255
28256#undef TARGET_PREFERRED_RELOAD_CLASS
28257#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
28258#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
28259#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
28260/* When this hook returns true for MODE, the compiler allows
28261 registers explicitly used in the rtl to be used as spill registers
28262 but prevents the compiler from extending the lifetime of these
28263 registers. */
28264#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
28265#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
28266#undef TARGET_CLASS_LIKELY_SPILLED_P
28267#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
28268#undef TARGET_CALLEE_SAVE_COST
28269#define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
28270
28271#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
28272#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
28273 ix86_builtin_vectorization_cost
28274#undef TARGET_VECTORIZE_VEC_PERM_CONST
28275#define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
28276#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
28277#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
28278 ix86_preferred_simd_mode
28279#undef TARGET_VECTORIZE_SPLIT_REDUCTION
28280#define TARGET_VECTORIZE_SPLIT_REDUCTION \
28281 ix86_split_reduction
28282#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
28283#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
28284 ix86_autovectorize_vector_modes
28285#undef TARGET_VECTORIZE_GET_MASK_MODE
28286#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
28287#undef TARGET_VECTORIZE_CREATE_COSTS
28288#define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
28289
28290#undef TARGET_SET_CURRENT_FUNCTION
28291#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
28292
28293#undef TARGET_OPTION_VALID_ATTRIBUTE_P
28294#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
28295
28296#undef TARGET_OPTION_SAVE
28297#define TARGET_OPTION_SAVE ix86_function_specific_save
28298
28299#undef TARGET_OPTION_RESTORE
28300#define TARGET_OPTION_RESTORE ix86_function_specific_restore
28301
28302#undef TARGET_OPTION_POST_STREAM_IN
28303#define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
28304
28305#undef TARGET_OPTION_PRINT
28306#define TARGET_OPTION_PRINT ix86_function_specific_print
28307
28308#undef TARGET_CAN_INLINE_P
28309#define TARGET_CAN_INLINE_P ix86_can_inline_p
28310
28311#undef TARGET_LEGITIMATE_ADDRESS_P
28312#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
28313
28314#undef TARGET_REGISTER_PRIORITY
28315#define TARGET_REGISTER_PRIORITY ix86_register_priority
28316
28317#undef TARGET_REGISTER_USAGE_LEVELING_P
28318#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
28319
28320#undef TARGET_LEGITIMATE_CONSTANT_P
28321#define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
28322
28323#undef TARGET_COMPUTE_FRAME_LAYOUT
28324#define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
28325
28326#undef TARGET_FRAME_POINTER_REQUIRED
28327#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
28328
28329#undef TARGET_CAN_ELIMINATE
28330#define TARGET_CAN_ELIMINATE ix86_can_eliminate
28331
28332#undef TARGET_EXTRA_LIVE_ON_ENTRY
28333#define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
28334
28335#undef TARGET_ASM_CODE_END
28336#define TARGET_ASM_CODE_END ix86_code_end
28337
28338#undef TARGET_CONDITIONAL_REGISTER_USAGE
28339#define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
28340
28341#undef TARGET_CANONICALIZE_COMPARISON
28342#define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
28343
28344#undef TARGET_LOOP_UNROLL_ADJUST
28345#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
28346
28347/* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
28348#undef TARGET_SPILL_CLASS
28349#define TARGET_SPILL_CLASS ix86_spill_class
28350
28351#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
28352#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
28353 ix86_simd_clone_compute_vecsize_and_simdlen
28354
28355#undef TARGET_SIMD_CLONE_ADJUST
28356#define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
28357
28358#undef TARGET_SIMD_CLONE_USABLE
28359#define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
28360
28361#undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
28362#define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
28363
28364#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
28365#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
28366 ix86_float_exceptions_rounding_supported_p
28367
28368#undef TARGET_MODE_EMIT
28369#define TARGET_MODE_EMIT ix86_emit_mode_set
28370
28371#undef TARGET_MODE_NEEDED
28372#define TARGET_MODE_NEEDED ix86_mode_needed
28373
28374#undef TARGET_MODE_AFTER
28375#define TARGET_MODE_AFTER ix86_mode_after
28376
28377#undef TARGET_MODE_ENTRY
28378#define TARGET_MODE_ENTRY ix86_mode_entry
28379
28380#undef TARGET_MODE_EXIT
28381#define TARGET_MODE_EXIT ix86_mode_exit
28382
28383#undef TARGET_MODE_PRIORITY
28384#define TARGET_MODE_PRIORITY ix86_mode_priority
28385
28386#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
28387#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
28388
28389#undef TARGET_OFFLOAD_OPTIONS
28390#define TARGET_OFFLOAD_OPTIONS \
28391 ix86_offload_options
28392
28393#undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
28394#define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
28395
28396#undef TARGET_OPTAB_SUPPORTED_P
28397#define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
28398
28399#undef TARGET_HARD_REGNO_SCRATCH_OK
28400#define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
28401
28402#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
28403#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
28404
28405#undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
28406#define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
28407
28408#undef TARGET_INIT_LIBFUNCS
28409#define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
28410
28411#undef TARGET_EXPAND_DIVMOD_LIBFUNC
28412#define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
28413
28414#undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
28415#define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
28416
28417#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
28418#define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
28419
28420#undef TARGET_HARD_REGNO_NREGS
28421#define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
28422#undef TARGET_HARD_REGNO_MODE_OK
28423#define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
28424
28425#undef TARGET_MODES_TIEABLE_P
28426#define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
28427
28428#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
28429#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
28430 ix86_hard_regno_call_part_clobbered
28431
28432#undef TARGET_INSN_CALLEE_ABI
28433#define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
28434
28435#undef TARGET_CAN_CHANGE_MODE_CLASS
28436#define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
28437
28438#undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
28439#define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
28440
28441#undef TARGET_STATIC_RTX_ALIGNMENT
28442#define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
28443#undef TARGET_CONSTANT_ALIGNMENT
28444#define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
28445
28446#undef TARGET_EMPTY_RECORD_P
28447#define TARGET_EMPTY_RECORD_P ix86_is_empty_record
28448
28449#undef TARGET_WARN_PARAMETER_PASSING_ABI
28450#define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
28451
28452#undef TARGET_GET_MULTILIB_ABI_NAME
28453#define TARGET_GET_MULTILIB_ABI_NAME \
28454 ix86_get_multilib_abi_name
28455
28456#undef TARGET_IFUNC_REF_LOCAL_OK
28457#define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
28458
28459#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
28460# undef TARGET_ASM_RELOC_RW_MASK
28461# define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
28462#endif
28463
28464#undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
28465#define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
28466
28467#undef TARGET_MEMTAG_ADD_TAG
28468#define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
28469
28470#undef TARGET_MEMTAG_SET_TAG
28471#define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
28472
28473#undef TARGET_MEMTAG_EXTRACT_TAG
28474#define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
28475
28476#undef TARGET_MEMTAG_UNTAGGED_POINTER
28477#define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
28478
28479#undef TARGET_MEMTAG_TAG_BITSIZE
28480#define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize
28481
28482#undef TARGET_GEN_CCMP_FIRST
28483#define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
28484
28485#undef TARGET_GEN_CCMP_NEXT
28486#define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
28487
28488#undef TARGET_HAVE_CCMP
28489#define TARGET_HAVE_CCMP ix86_have_ccmp
28490
28491#undef TARGET_MODE_CAN_TRANSFER_BITS
28492#define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits
28493
28494#undef TARGET_REDZONE_CLOBBER
28495#define TARGET_REDZONE_CLOBBER ix86_redzone_clobber
28496
28497static bool
28498ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
28499{
28500#ifdef OPTION_GLIBC
28501 if (OPTION_GLIBC)
28502 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
28503 else
28504 return false;
28505#else
28506 return false;
28507#endif
28508}
28509
28510#undef TARGET_LIBC_HAS_FAST_FUNCTION
28511#define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
28512
28513static unsigned
28514ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
28515 bool boundary_p)
28516{
28517#ifdef OPTION_GLIBC
28518 bool glibc_p = OPTION_GLIBC;
28519#else
28520 bool glibc_p = false;
28521#endif
28522 if (glibc_p)
28523 {
28524 /* If __FAST_MATH__ is defined, glibc provides libmvec. */
28525 unsigned int libmvec_ret = 0;
28526 if (!flag_trapping_math
28527 && flag_unsafe_math_optimizations
28528 && flag_finite_math_only
28529 && !flag_signed_zeros
28530 && !flag_errno_math)
28531 switch (cfn)
28532 {
28533 CASE_CFN_COS:
28534 CASE_CFN_COS_FN:
28535 CASE_CFN_SIN:
28536 CASE_CFN_SIN_FN:
28537 if (!boundary_p)
28538 {
28539 /* With non-default rounding modes, libmvec provides
28540 complete garbage in results. E.g.
28541 _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
28542 returns 0.00333309174f rather than 1.40129846e-45f. */
28543 if (flag_rounding_math)
28544 return ~0U;
28545 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
28546 claims libmvec maximum error is 4ulps.
28547 My own random testing indicates 2ulps for SFmode and
28548 0.5ulps for DFmode, but let's go with the 4ulps. */
28549 libmvec_ret = 4;
28550 }
28551 break;
28552 default:
28553 break;
28554 }
28555 unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
28556 boundary_p);
28557 return MAX (ret, libmvec_ret);
28558 }
28559 return default_libm_function_max_error (cfn, mode, boundary_p);
28560}
28561
28562#undef TARGET_LIBM_FUNCTION_MAX_ERROR
28563#define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
28564
28565#if TARGET_MACHO
28566static bool
28567ix86_cannot_copy_insn_p (rtx_insn *insn)
28568{
28569 if (TARGET_64BIT)
28570 return false;
28571
28572 rtx set = single_set (insn);
28573 if (set)
28574 {
28575 rtx src = SET_SRC (set);
28576 if (GET_CODE (src) == UNSPEC
28577 && XINT (src, 1) == UNSPEC_SET_GOT)
28578 return true;
28579 }
28580 return false;
28581}
28582
28583#undef TARGET_CANNOT_COPY_INSN_P
28584#define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p
28585
28586#endif
28587
28588#if CHECKING_P
28589#undef TARGET_RUN_TARGET_SELFTESTS
28590#define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
28591#endif /* #if CHECKING_P */
28592
28593#undef TARGET_DOCUMENTATION_NAME
28594#define TARGET_DOCUMENTATION_NAME "x86"
28595
28596/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
28597sbitmap
28598ix86_get_separate_components (void)
28599{
28600 HOST_WIDE_INT offset, to_allocate;
28601 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
28602 bitmap_clear (components);
28603 struct machine_function *m = cfun->machine;
28604
28605 offset = m->frame.stack_pointer_offset;
28606 to_allocate = offset - m->frame.sse_reg_save_offset;
28607
28608 /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
28609 Experiments show that APX PPX can speed up the prologue. If the function
28610 does not exit early during actual execution, then using APX PPX is faster.
28611 If the function always exits early during actual execution, then shrink
28612 wrap separate reduces the number of MOV (PUSH/POP) instructions actually
28613 executed, thus speeding up execution.
28614 foo:
28615 movl $1, %eax
28616 testq %rdi, %rdi
28617 jne.L60
28618 ret ---> early return.
28619 .L60:
28620 subq $88, %rsp ---> belong to prologue.
28621 xorl %eax, %eax
28622 movq %rbx, 40 (%rsp) ---> belong to prologue.
28623 movq 8 (%rdi), %rbx
28624 movq %rbp, 48 (%rsp) ---> belong to prologue.
28625 movq %rdi, %rbp
28626 testq %rbx, %rbx
28627 jne.L61
28628 movq 40 (%rsp), %rbx
28629 movq 48 (%rsp), %rbp
28630 addq $88, %rsp
28631 ret
28632 .L61:
28633 movq %r12, 56 (%rsp) ---> belong to prologue.
28634 movq %r13, 64 (%rsp) ---> belong to prologue.
28635 movq %r14, 72 (%rsp) ---> belong to prologue.
28636 ... ...
28637
28638 Disable shrink wrap separate when PPX is enabled. */
28639 if ((TARGET_APX_PPX && !crtl->calls_eh_return)
28640 || cfun->machine->func_type != TYPE_NORMAL
28641 || TARGET_SEH
28642 || crtl->stack_realign_needed
28643 || m->call_ms2sysv)
28644 return components;
28645
28646 /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
28647 Disable shrink wrap separate when MOV is prohibited. */
28648 if (save_regs_using_push_pop (to_allocate))
28649 return components;
28650
28651 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
28652 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
28653 {
28654 /* Skip registers with large offsets, where a pseudo may be needed. */
28655 if (IN_RANGE (offset, -0x8000, 0x7fff))
28656 bitmap_set_bit (map: components, bitno: regno);
28657 offset += UNITS_PER_WORD;
28658 }
28659
28660 /* Don't mess with the following registers. */
28661 if (frame_pointer_needed)
28662 bitmap_clear_bit (map: components, HARD_FRAME_POINTER_REGNUM);
28663
28664 if (crtl->drap_reg)
28665 bitmap_clear_bit (map: components, REGNO (crtl->drap_reg));
28666
28667 if (pic_offset_table_rtx)
28668 bitmap_clear_bit (map: components, REAL_PIC_OFFSET_TABLE_REGNUM);
28669
28670 return components;
28671}
28672
28673/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
28674sbitmap
28675ix86_components_for_bb (basic_block bb)
28676{
28677 bitmap in = DF_LIVE_IN (bb);
28678 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
28679 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
28680
28681 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
28682 bitmap_clear (components);
28683
28684 function_abi_aggregator callee_abis;
28685 rtx_insn *insn;
28686 FOR_BB_INSNS (bb, insn)
28687 if (CALL_P (insn))
28688 callee_abis.note_callee_abi (abi: insn_callee_abi (insn));
28689 HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
28690
28691 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
28692 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
28693 if (!fixed_regs[regno]
28694 && (TEST_HARD_REG_BIT (set: extra_caller_saves, bit: regno)
28695 || bitmap_bit_p (in, regno)
28696 || bitmap_bit_p (gen, regno)
28697 || bitmap_bit_p (kill, regno)))
28698 bitmap_set_bit (map: components, bitno: regno);
28699
28700 return components;
28701}
28702
28703/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
28704void
28705ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
28706{
28707 /* Nothing to do for x86. */
28708}
28709
28710/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
28711void
28712ix86_emit_prologue_components (sbitmap components)
28713{
28714 HOST_WIDE_INT cfa_offset;
28715 struct machine_function *m = cfun->machine;
28716
28717 cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
28718 - m->frame.stack_pointer_offset;
28719 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
28720 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
28721 {
28722 if (bitmap_bit_p (map: components, bitno: regno))
28723 ix86_emit_save_reg_using_mov (mode: word_mode, regno, cfa_offset);
28724 cfa_offset -= UNITS_PER_WORD;
28725 }
28726}
28727
28728/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
28729void
28730ix86_emit_epilogue_components (sbitmap components)
28731{
28732 HOST_WIDE_INT cfa_offset;
28733 struct machine_function *m = cfun->machine;
28734 cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
28735 - m->frame.stack_pointer_offset;
28736
28737 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
28738 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
28739 {
28740 if (bitmap_bit_p (map: components, bitno: regno))
28741 {
28742 rtx reg = gen_rtx_REG (word_mode, regno);
28743 rtx mem;
28744 rtx_insn *insn;
28745
28746 mem = choose_baseaddr (cfa_offset, NULL);
28747 mem = gen_frame_mem (word_mode, mem);
28748 insn = emit_move_insn (reg, mem);
28749
28750 RTX_FRAME_RELATED_P (insn) = 1;
28751 add_reg_note (insn, REG_CFA_RESTORE, reg);
28752 }
28753 cfa_offset -= UNITS_PER_WORD;
28754 }
28755}
28756
28757/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
28758void
28759ix86_set_handled_components (sbitmap components)
28760{
28761 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
28762 if (bitmap_bit_p (map: components, bitno: regno))
28763 {
28764 cfun->machine->reg_is_wrapped_separately[regno] = true;
28765 cfun->machine->use_fast_prologue_epilogue = true;
28766 cfun->machine->frame.save_regs_using_mov = true;
28767 }
28768}
28769
28770#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
28771#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
28772#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
28773#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
28774#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
28775#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
28776#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
28777#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
28778 ix86_emit_prologue_components
28779#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
28780#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
28781 ix86_emit_epilogue_components
28782#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
28783#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
28784
28785struct gcc_target targetm = TARGET_INITIALIZER;
28786
28787#include "gt-i386.h"
28788

source code of gcc/config/i386/i386.cc