| 1 | /* Subroutines used for code generation on IA-32. |
| 2 | Copyright (C) 1988-2026 Free Software Foundation, Inc. |
| 3 | |
| 4 | This file is part of GCC. |
| 5 | |
| 6 | GCC is free software; you can redistribute it and/or modify |
| 7 | it under the terms of the GNU General Public License as published by |
| 8 | the Free Software Foundation; either version 3, or (at your option) |
| 9 | any later version. |
| 10 | |
| 11 | GCC is distributed in the hope that it will be useful, |
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | GNU General Public License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU General Public License |
| 17 | along with GCC; see the file COPYING3. If not see |
| 18 | <http://www.gnu.org/licenses/>. */ |
| 19 | |
| 20 | #define INCLUDE_STRING |
| 21 | #define IN_TARGET_CODE 1 |
| 22 | |
| 23 | #include "config.h" |
| 24 | #include "system.h" |
| 25 | #include "coretypes.h" |
| 26 | #include "backend.h" |
| 27 | #include "rtl.h" |
| 28 | #include "tree.h" |
| 29 | #include "memmodel.h" |
| 30 | #include "gimple.h" |
| 31 | #include "cfghooks.h" |
| 32 | #include "cfgloop.h" |
| 33 | #include "df.h" |
| 34 | #include "tm_p.h" |
| 35 | #include "stringpool.h" |
| 36 | #include "expmed.h" |
| 37 | #include "optabs.h" |
| 38 | #include "regs.h" |
| 39 | #include "emit-rtl.h" |
| 40 | #include "recog.h" |
| 41 | #include "cgraph.h" |
| 42 | #include "diagnostic.h" |
| 43 | #include "cfgbuild.h" |
| 44 | #include "alias.h" |
| 45 | #include "fold-const.h" |
| 46 | #include "attribs.h" |
| 47 | #include "calls.h" |
| 48 | #include "stor-layout.h" |
| 49 | #include "varasm.h" |
| 50 | #include "output.h" |
| 51 | #include "insn-attr.h" |
| 52 | #include "flags.h" |
| 53 | #include "except.h" |
| 54 | #include "explow.h" |
| 55 | #include "expr.h" |
| 56 | #include "cfgrtl.h" |
| 57 | #include "common/common-target.h" |
| 58 | #include "langhooks.h" |
| 59 | #include "reload.h" |
| 60 | #include "gimplify.h" |
| 61 | #include "dwarf2.h" |
| 62 | #include "tm-constrs.h" |
| 63 | #include "cselib.h" |
| 64 | #include "sched-int.h" |
| 65 | #include "opts.h" |
| 66 | #include "tree-pass.h" |
| 67 | #include "context.h" |
| 68 | #include "pass_manager.h" |
| 69 | #include "target-globals.h" |
| 70 | #include "gimple-iterator.h" |
| 71 | #include "gimple-fold.h" |
| 72 | #include "tree-vectorizer.h" |
| 73 | #include "shrink-wrap.h" |
| 74 | #include "builtins.h" |
| 75 | #include "rtl-iter.h" |
| 76 | #include "tree-iterator.h" |
| 77 | #include "dbgcnt.h" |
| 78 | #include "case-cfn-macros.h" |
| 79 | #include "dojump.h" |
| 80 | #include "fold-const-call.h" |
| 81 | #include "tree-vrp.h" |
| 82 | #include "tree-ssanames.h" |
| 83 | #include "selftest.h" |
| 84 | #include "selftest-rtl.h" |
| 85 | #include "print-rtl.h" |
| 86 | #include "intl.h" |
| 87 | #include "ifcvt.h" |
| 88 | #include "symbol-summary.h" |
| 89 | #include "sreal.h" |
| 90 | #include "ipa-cp.h" |
| 91 | #include "ipa-prop.h" |
| 92 | #include "ipa-fnsummary.h" |
| 93 | #include "wide-int-bitmask.h" |
| 94 | #include "tree-vector-builder.h" |
| 95 | #include "debug.h" |
| 96 | #include "dwarf2out.h" |
| 97 | #include "i386-options.h" |
| 98 | #include "i386-builtins.h" |
| 99 | #include "i386-expand.h" |
| 100 | #include "i386-features.h" |
| 101 | #include "function-abi.h" |
| 102 | #include "rtl-error.h" |
| 103 | #include "gimple-pretty-print.h" |
| 104 | |
| 105 | /* This file should be included last. */ |
| 106 | #include "target-def.h" |
| 107 | |
| 108 | static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); |
| 109 | static void ix86_emit_restore_reg_using_pop (rtx, bool = false); |
| 110 | |
| 111 | |
| 112 | #ifndef CHECK_STACK_LIMIT |
| 113 | #define CHECK_STACK_LIMIT (-1) |
| 114 | #endif |
| 115 | |
| 116 | /* Return index of given mode in mult and division cost tables. */ |
| 117 | #define MODE_INDEX(mode) \ |
| 118 | ((mode) == QImode ? 0 \ |
| 119 | : (mode) == HImode ? 1 \ |
| 120 | : (mode) == SImode ? 2 \ |
| 121 | : (mode) == DImode ? 3 \ |
| 122 | : 4) |
| 123 | |
| 124 | |
| 125 | /* Set by -mtune. */ |
| 126 | const struct processor_costs *ix86_tune_cost = NULL; |
| 127 | |
| 128 | /* Set by -mtune or -Os. */ |
| 129 | const struct processor_costs *ix86_cost = NULL; |
| 130 | |
| 131 | /* In case the average insn count for single function invocation is |
| 132 | lower than this constant, emit fast (but longer) prologue and |
| 133 | epilogue code. */ |
| 134 | #define FAST_PROLOGUE_INSN_COUNT 20 |
| 135 | |
| 136 | /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ |
| 137 | static const char *const qi_reg_name[] = QI_REGISTER_NAMES; |
| 138 | static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; |
| 139 | static const char *const hi_reg_name[] = HI_REGISTER_NAMES; |
| 140 | |
| 141 | /* Array of the smallest class containing reg number REGNO, indexed by |
| 142 | REGNO. Used by REGNO_REG_CLASS in i386.h. */ |
| 143 | |
| 144 | enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = |
| 145 | { |
| 146 | /* ax, dx, cx, bx */ |
| 147 | AREG, DREG, CREG, BREG, |
| 148 | /* si, di, bp, sp */ |
| 149 | SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, |
| 150 | /* FP registers */ |
| 151 | FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, |
| 152 | FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, |
| 153 | /* arg pointer, flags, fpsr, frame */ |
| 154 | NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS, |
| 155 | /* SSE registers */ |
| 156 | SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, |
| 157 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
| 158 | /* MMX registers */ |
| 159 | MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
| 160 | MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
| 161 | /* REX registers */ |
| 162 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| 163 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| 164 | /* SSE REX registers */ |
| 165 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
| 166 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
| 167 | /* AVX-512 SSE registers */ |
| 168 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
| 169 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
| 170 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
| 171 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
| 172 | /* Mask registers. */ |
| 173 | ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS, |
| 174 | MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS, |
| 175 | /* REX2 registers */ |
| 176 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| 177 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| 178 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| 179 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| 180 | }; |
| 181 | |
| 182 | /* The "default" register map used in 32bit mode. */ |
| 183 | |
| 184 | unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] = |
| 185 | { |
| 186 | /* general regs */ |
| 187 | 0, 2, 1, 3, 6, 7, 4, 5, |
| 188 | /* fp regs */ |
| 189 | 12, 13, 14, 15, 16, 17, 18, 19, |
| 190 | /* arg, flags, fpsr, frame */ |
| 191 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
| 192 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
| 193 | /* SSE */ |
| 194 | 21, 22, 23, 24, 25, 26, 27, 28, |
| 195 | /* MMX */ |
| 196 | 29, 30, 31, 32, 33, 34, 35, 36, |
| 197 | /* extended integer registers */ |
| 198 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 199 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 200 | /* extended sse registers */ |
| 201 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 202 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 203 | /* AVX-512 registers 16-23 */ |
| 204 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 205 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 206 | /* AVX-512 registers 24-31 */ |
| 207 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 208 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 209 | /* Mask registers */ |
| 210 | 93, 94, 95, 96, 97, 98, 99, 100 |
| 211 | }; |
| 212 | |
| 213 | /* The "default" register map used in 64bit mode. */ |
| 214 | |
| 215 | unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] = |
| 216 | { |
| 217 | /* general regs */ |
| 218 | 0, 1, 2, 3, 4, 5, 6, 7, |
| 219 | /* fp regs */ |
| 220 | 33, 34, 35, 36, 37, 38, 39, 40, |
| 221 | /* arg, flags, fpsr, frame */ |
| 222 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
| 223 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
| 224 | /* SSE */ |
| 225 | 17, 18, 19, 20, 21, 22, 23, 24, |
| 226 | /* MMX */ |
| 227 | 41, 42, 43, 44, 45, 46, 47, 48, |
| 228 | /* extended integer registers */ |
| 229 | 8, 9, 10, 11, 12, 13, 14, 15, |
| 230 | /* extended SSE registers */ |
| 231 | 25, 26, 27, 28, 29, 30, 31, 32, |
| 232 | /* AVX-512 registers 16-23 */ |
| 233 | 67, 68, 69, 70, 71, 72, 73, 74, |
| 234 | /* AVX-512 registers 24-31 */ |
| 235 | 75, 76, 77, 78, 79, 80, 81, 82, |
| 236 | /* Mask registers */ |
| 237 | 118, 119, 120, 121, 122, 123, 124, 125, |
| 238 | /* rex2 extend interger registers */ |
| 239 | 130, 131, 132, 133, 134, 135, 136, 137, |
| 240 | 138, 139, 140, 141, 142, 143, 144, 145 |
| 241 | }; |
| 242 | |
| 243 | /* Define the register numbers to be used in Dwarf debugging information. |
| 244 | The SVR4 reference port C compiler uses the following register numbers |
| 245 | in its Dwarf output code: |
| 246 | 0 for %eax (gcc regno = 0) |
| 247 | 1 for %ecx (gcc regno = 2) |
| 248 | 2 for %edx (gcc regno = 1) |
| 249 | 3 for %ebx (gcc regno = 3) |
| 250 | 4 for %esp (gcc regno = 7) |
| 251 | 5 for %ebp (gcc regno = 6) |
| 252 | 6 for %esi (gcc regno = 4) |
| 253 | 7 for %edi (gcc regno = 5) |
| 254 | The following three DWARF register numbers are never generated by |
| 255 | the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 |
| 256 | believed these numbers have these meanings. |
| 257 | 8 for %eip (no gcc equivalent) |
| 258 | 9 for %eflags (gcc regno = 17) |
| 259 | 10 for %trapno (no gcc equivalent) |
| 260 | It is not at all clear how we should number the FP stack registers |
| 261 | for the x86 architecture. If the version of SDB on x86/svr4 were |
| 262 | a bit less brain dead with respect to floating-point then we would |
| 263 | have a precedent to follow with respect to DWARF register numbers |
| 264 | for x86 FP registers, but the SDB on x86/svr4 was so completely |
| 265 | broken with respect to FP registers that it is hardly worth thinking |
| 266 | of it as something to strive for compatibility with. |
| 267 | The version of x86/svr4 SDB I had does (partially) |
| 268 | seem to believe that DWARF register number 11 is associated with |
| 269 | the x86 register %st(0), but that's about all. Higher DWARF |
| 270 | register numbers don't seem to be associated with anything in |
| 271 | particular, and even for DWARF regno 11, SDB only seemed to under- |
| 272 | stand that it should say that a variable lives in %st(0) (when |
| 273 | asked via an `=' command) if we said it was in DWARF regno 11, |
| 274 | but SDB still printed garbage when asked for the value of the |
| 275 | variable in question (via a `/' command). |
| 276 | (Also note that the labels SDB printed for various FP stack regs |
| 277 | when doing an `x' command were all wrong.) |
| 278 | Note that these problems generally don't affect the native SVR4 |
| 279 | C compiler because it doesn't allow the use of -O with -g and |
| 280 | because when it is *not* optimizing, it allocates a memory |
| 281 | location for each floating-point variable, and the memory |
| 282 | location is what gets described in the DWARF AT_location |
| 283 | attribute for the variable in question. |
| 284 | Regardless of the severe mental illness of the x86/svr4 SDB, we |
| 285 | do something sensible here and we use the following DWARF |
| 286 | register numbers. Note that these are all stack-top-relative |
| 287 | numbers. |
| 288 | 11 for %st(0) (gcc regno = 8) |
| 289 | 12 for %st(1) (gcc regno = 9) |
| 290 | 13 for %st(2) (gcc regno = 10) |
| 291 | 14 for %st(3) (gcc regno = 11) |
| 292 | 15 for %st(4) (gcc regno = 12) |
| 293 | 16 for %st(5) (gcc regno = 13) |
| 294 | 17 for %st(6) (gcc regno = 14) |
| 295 | 18 for %st(7) (gcc regno = 15) |
| 296 | */ |
| 297 | unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] = |
| 298 | { |
| 299 | /* general regs */ |
| 300 | 0, 2, 1, 3, 6, 7, 5, 4, |
| 301 | /* fp regs */ |
| 302 | 11, 12, 13, 14, 15, 16, 17, 18, |
| 303 | /* arg, flags, fpsr, frame */ |
| 304 | IGNORED_DWARF_REGNUM, 9, |
| 305 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
| 306 | /* SSE registers */ |
| 307 | 21, 22, 23, 24, 25, 26, 27, 28, |
| 308 | /* MMX registers */ |
| 309 | 29, 30, 31, 32, 33, 34, 35, 36, |
| 310 | /* extended integer registers */ |
| 311 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 312 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 313 | /* extended sse registers */ |
| 314 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 315 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 316 | /* AVX-512 registers 16-23 */ |
| 317 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 318 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 319 | /* AVX-512 registers 24-31 */ |
| 320 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 321 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| 322 | /* Mask registers */ |
| 323 | 93, 94, 95, 96, 97, 98, 99, 100 |
| 324 | }; |
| 325 | |
| 326 | /* Define parameter passing and return registers. */ |
| 327 | |
| 328 | static int const x86_64_int_parameter_registers[6] = |
| 329 | { |
| 330 | DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG |
| 331 | }; |
| 332 | |
| 333 | static int const x86_64_ms_abi_int_parameter_registers[4] = |
| 334 | { |
| 335 | CX_REG, DX_REG, R8_REG, R9_REG |
| 336 | }; |
| 337 | |
| 338 | /* Similar as Clang's preserve_none function parameter passing. |
| 339 | NB: Use DI_REG and SI_REG, see ix86_function_value_regno_p. */ |
| 340 | |
| 341 | static int const x86_64_preserve_none_int_parameter_registers[6] = |
| 342 | { |
| 343 | R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG |
| 344 | }; |
| 345 | |
| 346 | static int const x86_64_int_return_registers[4] = |
| 347 | { |
| 348 | AX_REG, DX_REG, DI_REG, SI_REG |
| 349 | }; |
| 350 | |
| 351 | /* Define the structure for the machine field in struct function. */ |
| 352 | |
| 353 | struct GTY(()) stack_local_entry { |
| 354 | unsigned short mode; |
| 355 | unsigned short n; |
| 356 | rtx rtl; |
| 357 | struct stack_local_entry *next; |
| 358 | }; |
| 359 | |
| 360 | /* Which cpu are we scheduling for. */ |
| 361 | enum attr_cpu ix86_schedule; |
| 362 | |
| 363 | /* Which cpu are we optimizing for. */ |
| 364 | enum processor_type ix86_tune; |
| 365 | |
| 366 | /* Which instruction set architecture to use. */ |
| 367 | enum processor_type ix86_arch; |
| 368 | |
| 369 | /* True if processor has SSE prefetch instruction. */ |
| 370 | unsigned char ix86_prefetch_sse; |
| 371 | |
| 372 | /* Preferred alignment for stack boundary in bits. */ |
| 373 | unsigned int ix86_preferred_stack_boundary; |
| 374 | |
| 375 | /* Alignment for incoming stack boundary in bits specified at |
| 376 | command line. */ |
| 377 | unsigned int ix86_user_incoming_stack_boundary; |
| 378 | |
| 379 | /* Default alignment for incoming stack boundary in bits. */ |
| 380 | unsigned int ix86_default_incoming_stack_boundary; |
| 381 | |
| 382 | /* Alignment for incoming stack boundary in bits. */ |
| 383 | unsigned int ix86_incoming_stack_boundary; |
| 384 | |
| 385 | /* True if there is no direct access to extern symbols. */ |
| 386 | bool ix86_has_no_direct_extern_access; |
| 387 | |
| 388 | /* Calling abi specific va_list type nodes. */ |
| 389 | tree sysv_va_list_type_node; |
| 390 | tree ms_va_list_type_node; |
| 391 | |
| 392 | /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ |
| 393 | char internal_label_prefix[16]; |
| 394 | int internal_label_prefix_len; |
| 395 | |
| 396 | /* Fence to use after loop using movnt. */ |
| 397 | tree x86_mfence; |
| 398 | |
| 399 | /* Register class used for passing given 64bit part of the argument. |
| 400 | These represent classes as documented by the PS ABI, with the exception |
| 401 | of SSESF, SSEDF classes, that are basically SSE class, just gcc will |
| 402 | use SF or DFmode move instead of DImode to avoid reformatting penalties. |
| 403 | |
| 404 | Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves |
| 405 | whenever possible (upper half does contain padding). */ |
| 406 | enum x86_64_reg_class |
| 407 | { |
| 408 | X86_64_NO_CLASS, |
| 409 | X86_64_INTEGER_CLASS, |
| 410 | X86_64_INTEGERSI_CLASS, |
| 411 | X86_64_SSE_CLASS, |
| 412 | X86_64_SSEHF_CLASS, |
| 413 | X86_64_SSESF_CLASS, |
| 414 | X86_64_SSEDF_CLASS, |
| 415 | X86_64_SSEUP_CLASS, |
| 416 | X86_64_X87_CLASS, |
| 417 | X86_64_X87UP_CLASS, |
| 418 | X86_64_COMPLEX_X87_CLASS, |
| 419 | X86_64_MEMORY_CLASS |
| 420 | }; |
| 421 | |
| 422 | #define MAX_CLASSES 8 |
| 423 | |
| 424 | /* Table of constants used by fldpi, fldln2, etc.... */ |
| 425 | static REAL_VALUE_TYPE ext_80387_constants_table [5]; |
| 426 | static bool ext_80387_constants_init; |
| 427 | |
| 428 | |
| 429 | static rtx ix86_function_value (const_tree, const_tree, bool); |
| 430 | static bool ix86_function_value_regno_p (const unsigned int); |
| 431 | static unsigned int ix86_function_arg_boundary (machine_mode, |
| 432 | const_tree); |
| 433 | static rtx ix86_static_chain (const_tree, bool); |
| 434 | static int ix86_function_regparm (const_tree, const_tree); |
| 435 | static void ix86_compute_frame_layout (void); |
| 436 | static tree ix86_canonical_va_list_type (tree); |
| 437 | static unsigned int split_stack_prologue_scratch_regno (void); |
| 438 | static bool i386_asm_output_addr_const_extra (FILE *, rtx); |
| 439 | |
| 440 | static bool ix86_can_inline_p (tree, tree); |
| 441 | static unsigned int ix86_minimum_incoming_stack_boundary (bool); |
| 442 | |
| 443 | typedef enum ix86_flags_cc |
| 444 | { |
| 445 | X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB, |
| 446 | X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE, |
| 447 | X86_CCS, X86_CCNS, X86_CCP, X86_CCNP, |
| 448 | X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE |
| 449 | } ix86_cc; |
| 450 | |
| 451 | static const char *ix86_ccmp_dfv_mapping[] = |
| 452 | { |
| 453 | "{dfv=of}" , "{dfv=}" , "{dfv=cf}" , "{dfv=}" , |
| 454 | "{dfv=zf}" , "{dfv=}" , "{dfv=cf, zf}" , "{dfv=}" , |
| 455 | "{dfv=sf}" , "{dfv=}" , "{dfv=cf}" , "{dfv=}" , |
| 456 | "{dfv=sf}" , "{dfv=sf, of}" , "{dfv=sf, of, zf}" , "{dfv=sf, of}" |
| 457 | }; |
| 458 | |
| 459 | |
| 460 | /* Whether -mtune= or -march= were specified */ |
| 461 | int ix86_tune_defaulted; |
| 462 | int ix86_arch_specified; |
| 463 | |
| 464 | /* Return true if a red-zone is in use. We can't use red-zone when |
| 465 | there are local indirect jumps, like "indirect_jump" or "tablejump", |
| 466 | which jumps to another place in the function, since "call" in the |
| 467 | indirect thunk pushes the return address onto stack, destroying |
| 468 | red-zone. |
| 469 | |
| 470 | NB: Don't use red-zone for functions with no_caller_saved_registers |
| 471 | and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small |
| 472 | for 31 GPRs or 15 GPRs + 16 XMM registers. |
| 473 | |
| 474 | TODO: If we can reserve the first 2 WORDs, for PUSH and, another |
| 475 | for CALL, in red-zone, we can allow local indirect jumps with |
| 476 | indirect thunk. */ |
| 477 | |
| 478 | bool |
| 479 | ix86_using_red_zone (void) |
| 480 | { |
| 481 | return (TARGET_RED_ZONE |
| 482 | && !TARGET_64BIT_MS_ABI |
| 483 | && ((!TARGET_APX_EGPR && !TARGET_SSE) |
| 484 | || (cfun->machine->call_saved_registers |
| 485 | != TYPE_NO_CALLER_SAVED_REGISTERS)) |
| 486 | && (!cfun->machine->has_local_indirect_jump |
| 487 | || cfun->machine->indirect_branch_type == indirect_branch_keep)); |
| 488 | } |
| 489 | |
| 490 | /* Return true, if profiling code should be emitted before |
| 491 | prologue. Otherwise it returns false. |
| 492 | Note: For x86 with "hotfix" it is sorried. */ |
| 493 | static bool |
| 494 | ix86_profile_before_prologue (void) |
| 495 | { |
| 496 | return flag_fentry != 0; |
| 497 | } |
| 498 | |
| 499 | /* Update register usage after having seen the compiler flags. */ |
| 500 | |
| 501 | static void |
| 502 | ix86_conditional_register_usage (void) |
| 503 | { |
| 504 | int i, c_mask; |
| 505 | |
| 506 | /* If there are no caller-saved registers, preserve all registers. |
| 507 | except fixed_regs and registers used for function return value |
| 508 | since aggregate_value_p checks call_used_regs[regno] on return |
| 509 | value. */ |
| 510 | if (cfun |
| 511 | && (cfun->machine->call_saved_registers |
| 512 | == TYPE_NO_CALLER_SAVED_REGISTERS)) |
| 513 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| 514 | if (!fixed_regs[i] && !ix86_function_value_regno_p (i)) |
| 515 | call_used_regs[i] = 0; |
| 516 | |
| 517 | /* For 32-bit targets, disable the REX registers. */ |
| 518 | if (! TARGET_64BIT) |
| 519 | { |
| 520 | for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) |
| 521 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
| 522 | for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) |
| 523 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
| 524 | for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
| 525 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
| 526 | } |
| 527 | |
| 528 | /* See the definition of CALL_USED_REGISTERS in i386.h. */ |
| 529 | c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI); |
| 530 | |
| 531 | CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); |
| 532 | |
| 533 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| 534 | { |
| 535 | /* Set/reset conditionally defined registers from |
| 536 | CALL_USED_REGISTERS initializer. */ |
| 537 | if (call_used_regs[i] > 1) |
| 538 | call_used_regs[i] = !!(call_used_regs[i] & c_mask); |
| 539 | |
| 540 | /* Calculate registers of CLOBBERED_REGS register set |
| 541 | as call used registers from GENERAL_REGS register set. */ |
| 542 | if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], bit: i) |
| 543 | && call_used_regs[i]) |
| 544 | SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], bit: i); |
| 545 | } |
| 546 | |
| 547 | /* If MMX is disabled, disable the registers. */ |
| 548 | if (! TARGET_MMX) |
| 549 | accessible_reg_set &= ~reg_class_contents[MMX_REGS]; |
| 550 | |
| 551 | /* If SSE is disabled, disable the registers. */ |
| 552 | if (! TARGET_SSE) |
| 553 | accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS]; |
| 554 | |
| 555 | /* If the FPU is disabled, disable the registers. */ |
| 556 | if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) |
| 557 | accessible_reg_set &= ~reg_class_contents[FLOAT_REGS]; |
| 558 | |
| 559 | /* If AVX512F is disabled, disable the registers. */ |
| 560 | if (! TARGET_AVX512F) |
| 561 | { |
| 562 | for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
| 563 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
| 564 | |
| 565 | accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS]; |
| 566 | } |
| 567 | |
| 568 | /* If APX is disabled, disable the registers. */ |
| 569 | if (! (TARGET_APX_EGPR && TARGET_64BIT)) |
| 570 | { |
| 571 | for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++) |
| 572 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
| 573 | } |
| 574 | } |
| 575 | |
| 576 | /* Canonicalize a comparison from one we don't have to one we do have. */ |
| 577 | |
| 578 | static void |
| 579 | ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1, |
| 580 | bool op0_preserve_value) |
| 581 | { |
| 582 | /* The order of operands in x87 ficom compare is forced by combine in |
| 583 | simplify_comparison () function. Float operator is treated as RTX_OBJ |
| 584 | with a precedence over other operators and is always put in the first |
| 585 | place. Swap condition and operands to match ficom instruction. */ |
| 586 | if (!op0_preserve_value |
| 587 | && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1)) |
| 588 | { |
| 589 | enum rtx_code scode = swap_condition ((enum rtx_code) *code); |
| 590 | |
| 591 | /* We are called only for compares that are split to SAHF instruction. |
| 592 | Ensure that we have setcc/jcc insn for the swapped condition. */ |
| 593 | if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN) |
| 594 | { |
| 595 | std::swap (a&: *op0, b&: *op1); |
| 596 | *code = (int) scode; |
| 597 | return; |
| 598 | } |
| 599 | } |
| 600 | |
| 601 | /* SUB (a, b) underflows precisely when a < b. Convert |
| 602 | (compare (minus (a b)) a) to (compare (a b)) |
| 603 | to match *sub<mode>_3 pattern. */ |
| 604 | if (!op0_preserve_value |
| 605 | && (*code == GTU || *code == LEU) |
| 606 | && GET_CODE (*op0) == MINUS |
| 607 | && rtx_equal_p (XEXP (*op0, 0), *op1)) |
| 608 | { |
| 609 | *op1 = XEXP (*op0, 1); |
| 610 | *op0 = XEXP (*op0, 0); |
| 611 | *code = (int) swap_condition ((enum rtx_code) *code); |
| 612 | return; |
| 613 | } |
| 614 | |
| 615 | /* Swap operands of GTU comparison to canonicalize |
| 616 | addcarry/subborrow comparison. */ |
| 617 | if (!op0_preserve_value |
| 618 | && *code == GTU |
| 619 | && GET_CODE (*op0) == PLUS |
| 620 | && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode) |
| 621 | && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND |
| 622 | && GET_CODE (*op1) == ZERO_EXTEND) |
| 623 | { |
| 624 | std::swap (a&: *op0, b&: *op1); |
| 625 | *code = (int) swap_condition ((enum rtx_code) *code); |
| 626 | return; |
| 627 | } |
| 628 | } |
| 629 | |
| 630 | /* Hook to determine if one function can safely inline another. */ |
| 631 | |
| 632 | static bool |
| 633 | ix86_can_inline_p (tree caller, tree callee) |
| 634 | { |
| 635 | tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); |
| 636 | tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); |
| 637 | |
| 638 | /* Changes of those flags can be tolerated for always inlines. Lets hope |
| 639 | user knows what he is doing. */ |
| 640 | unsigned HOST_WIDE_INT always_inline_safe_mask |
| 641 | = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS |
| 642 | | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD |
| 643 | | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD |
| 644 | | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS |
| 645 | | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE |
| 646 | | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER |
| 647 | | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER); |
| 648 | |
| 649 | |
| 650 | if (!callee_tree) |
| 651 | callee_tree = target_option_default_node; |
| 652 | if (!caller_tree) |
| 653 | caller_tree = target_option_default_node; |
| 654 | if (callee_tree == caller_tree) |
| 655 | return true; |
| 656 | |
| 657 | struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); |
| 658 | struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); |
| 659 | bool ret = false; |
| 660 | bool always_inline |
| 661 | = (DECL_DISREGARD_INLINE_LIMITS (callee) |
| 662 | && lookup_attribute (attr_name: "always_inline" , |
| 663 | DECL_ATTRIBUTES (callee))); |
| 664 | |
| 665 | /* If callee only uses GPRs, ignore MASK_80387. */ |
| 666 | if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags)) |
| 667 | always_inline_safe_mask |= MASK_80387; |
| 668 | |
| 669 | cgraph_node *callee_node = cgraph_node::get (decl: callee); |
| 670 | /* Callee's isa options should be a subset of the caller's, i.e. a SSE4 |
| 671 | function can inline a SSE2 function but a SSE2 function can't inline |
| 672 | a SSE4 function. */ |
| 673 | if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags) |
| 674 | != callee_opts->x_ix86_isa_flags) |
| 675 | || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2) |
| 676 | != callee_opts->x_ix86_isa_flags2)) |
| 677 | ret = false; |
| 678 | |
| 679 | /* See if we have the same non-isa options. */ |
| 680 | else if ((!always_inline |
| 681 | && caller_opts->x_target_flags != callee_opts->x_target_flags) |
| 682 | || (caller_opts->x_target_flags & ~always_inline_safe_mask) |
| 683 | != (callee_opts->x_target_flags & ~always_inline_safe_mask)) |
| 684 | ret = false; |
| 685 | |
| 686 | else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath |
| 687 | /* If the calle doesn't use FP expressions differences in |
| 688 | ix86_fpmath can be ignored. We are called from FEs |
| 689 | for multi-versioning call optimization, so beware of |
| 690 | ipa_fn_summaries not available. */ |
| 691 | && (! ipa_fn_summaries |
| 692 | || ipa_fn_summaries->get (node: callee_node) == NULL |
| 693 | || ipa_fn_summaries->get (node: callee_node)->fp_expressions)) |
| 694 | ret = false; |
| 695 | |
| 696 | /* At this point we cannot identify whether arch or tune setting |
| 697 | comes from target attribute or not. So the most conservative way |
| 698 | is to allow the callee that uses default arch and tune string to |
| 699 | be inlined. */ |
| 700 | else if (!strcmp (s1: callee_opts->x_ix86_arch_string, s2: "x86-64" ) |
| 701 | && !strcmp (s1: callee_opts->x_ix86_tune_string, s2: "generic" )) |
| 702 | ret = true; |
| 703 | |
| 704 | /* See if arch, tune, etc. are the same. As previous ISA flags already |
| 705 | checks if callee's ISA is subset of caller's, do not block |
| 706 | always_inline attribute for callee even it has different arch. */ |
| 707 | else if (!always_inline && caller_opts->arch != callee_opts->arch) |
| 708 | ret = false; |
| 709 | |
| 710 | else if (!always_inline && caller_opts->tune != callee_opts->tune) |
| 711 | ret = false; |
| 712 | |
| 713 | else if (!always_inline |
| 714 | && caller_opts->branch_cost != callee_opts->branch_cost) |
| 715 | ret = false; |
| 716 | |
| 717 | else |
| 718 | ret = true; |
| 719 | |
| 720 | return ret; |
| 721 | } |
| 722 | |
| 723 | /* Return true if this goes in large data/bss. */ |
| 724 | |
| 725 | static bool |
| 726 | ix86_in_large_data_p (tree exp) |
| 727 | { |
| 728 | if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC |
| 729 | && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC) |
| 730 | return false; |
| 731 | |
| 732 | if (exp == NULL_TREE) |
| 733 | return false; |
| 734 | |
| 735 | /* Functions are never large data. */ |
| 736 | if (TREE_CODE (exp) == FUNCTION_DECL) |
| 737 | return false; |
| 738 | |
| 739 | /* Automatic variables are never large data. */ |
| 740 | if (VAR_P (exp) && !is_global_var (t: exp)) |
| 741 | return false; |
| 742 | |
| 743 | if (VAR_P (exp) && DECL_SECTION_NAME (exp)) |
| 744 | { |
| 745 | const char *section = DECL_SECTION_NAME (exp); |
| 746 | if (strcmp (s1: section, s2: ".ldata" ) == 0 |
| 747 | || strcmp (s1: section, s2: ".lbss" ) == 0) |
| 748 | return true; |
| 749 | return false; |
| 750 | } |
| 751 | else |
| 752 | { |
| 753 | HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); |
| 754 | |
| 755 | /* If this is an incomplete type with size 0, then we can't put it |
| 756 | in data because it might be too big when completed. Also, |
| 757 | int_size_in_bytes returns -1 if size can vary or is larger than |
| 758 | an integer in which case also it is safer to assume that it goes in |
| 759 | large data. */ |
| 760 | if (size <= 0 || size > ix86_section_threshold) |
| 761 | return true; |
| 762 | } |
| 763 | |
| 764 | return false; |
| 765 | } |
| 766 | |
| 767 | /* i386-specific section flag to mark large sections. */ |
| 768 | #define SECTION_LARGE SECTION_MACH_DEP |
| 769 | |
| 770 | /* Switch to the appropriate section for output of DECL. |
| 771 | DECL is either a `VAR_DECL' node or a constant of some sort. |
| 772 | RELOC indicates whether forming the initial value of DECL requires |
| 773 | link-time relocations. */ |
| 774 | |
| 775 | ATTRIBUTE_UNUSED static section * |
| 776 | x86_64_elf_select_section (tree decl, int reloc, |
| 777 | unsigned HOST_WIDE_INT align) |
| 778 | { |
| 779 | if (ix86_in_large_data_p (exp: decl)) |
| 780 | { |
| 781 | const char *sname = NULL; |
| 782 | unsigned int flags = SECTION_WRITE | SECTION_LARGE; |
| 783 | switch (categorize_decl_for_section (decl, reloc)) |
| 784 | { |
| 785 | case SECCAT_DATA: |
| 786 | sname = ".ldata" ; |
| 787 | break; |
| 788 | case SECCAT_DATA_REL: |
| 789 | sname = ".ldata.rel" ; |
| 790 | break; |
| 791 | case SECCAT_DATA_REL_LOCAL: |
| 792 | sname = ".ldata.rel.local" ; |
| 793 | break; |
| 794 | case SECCAT_DATA_REL_RO: |
| 795 | sname = ".ldata.rel.ro" ; |
| 796 | break; |
| 797 | case SECCAT_DATA_REL_RO_LOCAL: |
| 798 | sname = ".ldata.rel.ro.local" ; |
| 799 | break; |
| 800 | case SECCAT_BSS: |
| 801 | sname = ".lbss" ; |
| 802 | flags |= SECTION_BSS; |
| 803 | break; |
| 804 | case SECCAT_RODATA: |
| 805 | case SECCAT_RODATA_MERGE_STR: |
| 806 | case SECCAT_RODATA_MERGE_STR_INIT: |
| 807 | case SECCAT_RODATA_MERGE_CONST: |
| 808 | sname = ".lrodata" ; |
| 809 | flags &= ~SECTION_WRITE; |
| 810 | break; |
| 811 | case SECCAT_SRODATA: |
| 812 | case SECCAT_SDATA: |
| 813 | case SECCAT_SBSS: |
| 814 | gcc_unreachable (); |
| 815 | case SECCAT_TEXT: |
| 816 | case SECCAT_TDATA: |
| 817 | case SECCAT_TBSS: |
| 818 | /* We don't split these for medium model. Place them into |
| 819 | default sections and hope for best. */ |
| 820 | break; |
| 821 | } |
| 822 | if (sname) |
| 823 | { |
| 824 | /* We might get called with string constants, but get_named_section |
| 825 | doesn't like them as they are not DECLs. Also, we need to set |
| 826 | flags in that case. */ |
| 827 | if (!DECL_P (decl)) |
| 828 | return get_section (sname, flags, NULL); |
| 829 | return get_named_section (decl, sname, reloc); |
| 830 | } |
| 831 | } |
| 832 | return default_elf_select_section (decl, reloc, align); |
| 833 | } |
| 834 | |
| 835 | /* Select a set of attributes for section NAME based on the properties |
| 836 | of DECL and whether or not RELOC indicates that DECL's initializer |
| 837 | might contain runtime relocations. */ |
| 838 | |
| 839 | static unsigned int ATTRIBUTE_UNUSED |
| 840 | x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) |
| 841 | { |
| 842 | unsigned int flags = default_section_type_flags (decl, name, reloc); |
| 843 | |
| 844 | if (ix86_in_large_data_p (exp: decl)) |
| 845 | flags |= SECTION_LARGE; |
| 846 | |
| 847 | if (decl == NULL_TREE |
| 848 | && (strcmp (s1: name, s2: ".ldata.rel.ro" ) == 0 |
| 849 | || strcmp (s1: name, s2: ".ldata.rel.ro.local" ) == 0)) |
| 850 | flags |= SECTION_RELRO; |
| 851 | |
| 852 | if (strcmp (s1: name, s2: ".lbss" ) == 0 |
| 853 | || startswith (str: name, prefix: ".lbss." ) |
| 854 | || startswith (str: name, prefix: ".gnu.linkonce.lb." )) |
| 855 | flags |= SECTION_BSS; |
| 856 | |
| 857 | return flags; |
| 858 | } |
| 859 | |
| 860 | /* Build up a unique section name, expressed as a |
| 861 | STRING_CST node, and assign it to DECL_SECTION_NAME (decl). |
| 862 | RELOC indicates whether the initial value of EXP requires |
| 863 | link-time relocations. */ |
| 864 | |
| 865 | static void ATTRIBUTE_UNUSED |
| 866 | x86_64_elf_unique_section (tree decl, int reloc) |
| 867 | { |
| 868 | if (ix86_in_large_data_p (exp: decl)) |
| 869 | { |
| 870 | const char *prefix = NULL; |
| 871 | /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ |
| 872 | bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; |
| 873 | |
| 874 | switch (categorize_decl_for_section (decl, reloc)) |
| 875 | { |
| 876 | case SECCAT_DATA: |
| 877 | case SECCAT_DATA_REL: |
| 878 | case SECCAT_DATA_REL_LOCAL: |
| 879 | case SECCAT_DATA_REL_RO: |
| 880 | case SECCAT_DATA_REL_RO_LOCAL: |
| 881 | prefix = one_only ? ".ld" : ".ldata" ; |
| 882 | break; |
| 883 | case SECCAT_BSS: |
| 884 | prefix = one_only ? ".lb" : ".lbss" ; |
| 885 | break; |
| 886 | case SECCAT_RODATA: |
| 887 | case SECCAT_RODATA_MERGE_STR: |
| 888 | case SECCAT_RODATA_MERGE_STR_INIT: |
| 889 | case SECCAT_RODATA_MERGE_CONST: |
| 890 | prefix = one_only ? ".lr" : ".lrodata" ; |
| 891 | break; |
| 892 | case SECCAT_SRODATA: |
| 893 | case SECCAT_SDATA: |
| 894 | case SECCAT_SBSS: |
| 895 | gcc_unreachable (); |
| 896 | case SECCAT_TEXT: |
| 897 | case SECCAT_TDATA: |
| 898 | case SECCAT_TBSS: |
| 899 | /* We don't split these for medium model. Place them into |
| 900 | default sections and hope for best. */ |
| 901 | break; |
| 902 | } |
| 903 | if (prefix) |
| 904 | { |
| 905 | const char *name, *linkonce; |
| 906 | char *string; |
| 907 | |
| 908 | name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); |
| 909 | name = targetm.strip_name_encoding (name); |
| 910 | |
| 911 | /* If we're using one_only, then there needs to be a .gnu.linkonce |
| 912 | prefix to the section name. */ |
| 913 | linkonce = one_only ? ".gnu.linkonce" : "" ; |
| 914 | |
| 915 | string = ACONCAT ((linkonce, prefix, "." , name, NULL)); |
| 916 | |
| 917 | set_decl_section_name (decl, string); |
| 918 | return; |
| 919 | } |
| 920 | } |
| 921 | default_unique_section (decl, reloc); |
| 922 | } |
| 923 | |
| 924 | /* Return true if TYPE has no_callee_saved_registers or preserve_none |
| 925 | attribute. */ |
| 926 | |
| 927 | bool |
| 928 | ix86_type_no_callee_saved_registers_p (const_tree type) |
| 929 | { |
| 930 | return (lookup_attribute (attr_name: "no_callee_saved_registers" , |
| 931 | TYPE_ATTRIBUTES (type)) != NULL |
| 932 | || lookup_attribute (attr_name: "preserve_none" , |
| 933 | TYPE_ATTRIBUTES (type)) != NULL); |
| 934 | } |
| 935 | |
| 936 | #ifdef COMMON_ASM_OP |
| 937 | |
| 938 | #ifndef LARGECOMM_SECTION_ASM_OP |
| 939 | #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t" |
| 940 | #endif |
| 941 | |
| 942 | /* This says how to output assembler code to declare an |
| 943 | uninitialized external linkage data object. |
| 944 | |
| 945 | For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for |
| 946 | large objects. */ |
| 947 | void |
| 948 | x86_elf_aligned_decl_common (FILE *file, tree decl, |
| 949 | const char *name, unsigned HOST_WIDE_INT size, |
| 950 | unsigned align) |
| 951 | { |
| 952 | if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC |
| 953 | || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
| 954 | && size > (unsigned int)ix86_section_threshold) |
| 955 | { |
| 956 | switch_to_section (get_named_section (decl, ".lbss" , 0)); |
| 957 | fputs (LARGECOMM_SECTION_ASM_OP, stream: file); |
| 958 | } |
| 959 | else |
| 960 | fputs (COMMON_ASM_OP, stream: file); |
| 961 | assemble_name (file, name); |
| 962 | fprintf (stream: file, format: "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n" , |
| 963 | size, align / BITS_PER_UNIT); |
| 964 | } |
| 965 | #endif |
| 966 | |
| 967 | /* Utility function for targets to use in implementing |
| 968 | ASM_OUTPUT_ALIGNED_BSS. */ |
| 969 | |
| 970 | void |
| 971 | x86_output_aligned_bss (FILE *file, tree decl, const char *name, |
| 972 | unsigned HOST_WIDE_INT size, unsigned align) |
| 973 | { |
| 974 | if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC |
| 975 | || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
| 976 | && size > (unsigned int)ix86_section_threshold) |
| 977 | switch_to_section (get_named_section (decl, ".lbss" , 0)); |
| 978 | else |
| 979 | switch_to_section (bss_section); |
| 980 | ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); |
| 981 | #ifdef ASM_DECLARE_OBJECT_NAME |
| 982 | last_assemble_variable_decl = decl; |
| 983 | ASM_DECLARE_OBJECT_NAME (file, name, decl); |
| 984 | #else |
| 985 | /* Standard thing is just output label for the object. */ |
| 986 | ASM_OUTPUT_LABEL (file, name); |
| 987 | #endif /* ASM_DECLARE_OBJECT_NAME */ |
| 988 | ASM_OUTPUT_SKIP (file, size ? size : 1); |
| 989 | } |
| 990 | |
| 991 | /* Decide whether we must probe the stack before any space allocation |
| 992 | on this target. It's essentially TARGET_STACK_PROBE except when |
| 993 | -fstack-check causes the stack to be already probed differently. */ |
| 994 | |
| 995 | bool |
| 996 | ix86_target_stack_probe (void) |
| 997 | { |
| 998 | /* Do not probe the stack twice if static stack checking is enabled. */ |
| 999 | if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) |
| 1000 | return false; |
| 1001 | |
| 1002 | return TARGET_STACK_PROBE; |
| 1003 | } |
| 1004 | |
| 1005 | /* Decide whether we can make a sibling call to a function. DECL is the |
| 1006 | declaration of the function being targeted by the call and EXP is the |
| 1007 | CALL_EXPR representing the call. */ |
| 1008 | |
| 1009 | static bool |
| 1010 | ix86_function_ok_for_sibcall (tree decl, tree exp) |
| 1011 | { |
| 1012 | tree type, decl_or_type; |
| 1013 | rtx a, b; |
| 1014 | bool bind_global = decl && !targetm.binds_local_p (decl); |
| 1015 | |
| 1016 | if (ix86_function_naked (fn: current_function_decl)) |
| 1017 | return false; |
| 1018 | |
| 1019 | /* Sibling call isn't OK if there are no caller-saved registers |
| 1020 | since all registers must be preserved before return. */ |
| 1021 | if (cfun->machine->call_saved_registers |
| 1022 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
| 1023 | return false; |
| 1024 | |
| 1025 | /* If we are generating position-independent code, we cannot sibcall |
| 1026 | optimize direct calls to global functions, as the PLT requires |
| 1027 | %ebx be live. (Darwin does not have a PLT.) */ |
| 1028 | if (!TARGET_MACHO |
| 1029 | && !TARGET_64BIT |
| 1030 | && flag_pic |
| 1031 | && flag_plt |
| 1032 | && bind_global) |
| 1033 | return false; |
| 1034 | |
| 1035 | /* If we need to align the outgoing stack, then sibcalling would |
| 1036 | unalign the stack, which may break the called function. */ |
| 1037 | if (ix86_minimum_incoming_stack_boundary (true) |
| 1038 | < PREFERRED_STACK_BOUNDARY) |
| 1039 | return false; |
| 1040 | |
| 1041 | if (decl) |
| 1042 | { |
| 1043 | decl_or_type = decl; |
| 1044 | type = TREE_TYPE (decl); |
| 1045 | } |
| 1046 | else |
| 1047 | { |
| 1048 | /* We're looking at the CALL_EXPR, we need the type of the function. */ |
| 1049 | type = CALL_EXPR_FN (exp); /* pointer expression */ |
| 1050 | type = TREE_TYPE (type); /* pointer type */ |
| 1051 | type = TREE_TYPE (type); /* function type */ |
| 1052 | decl_or_type = type; |
| 1053 | } |
| 1054 | |
| 1055 | /* Sibling call isn't OK if callee has no callee-saved registers |
| 1056 | and the calling function has callee-saved registers. */ |
| 1057 | if ((cfun->machine->call_saved_registers |
| 1058 | != TYPE_NO_CALLEE_SAVED_REGISTERS) |
| 1059 | && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE |
| 1060 | && ix86_type_no_callee_saved_registers_p (type)) |
| 1061 | return false; |
| 1062 | |
| 1063 | /* If outgoing reg parm stack space changes, we cannot do sibcall. */ |
| 1064 | if ((OUTGOING_REG_PARM_STACK_SPACE (type) |
| 1065 | != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl))) |
| 1066 | || (REG_PARM_STACK_SPACE (decl_or_type) |
| 1067 | != REG_PARM_STACK_SPACE (current_function_decl))) |
| 1068 | { |
| 1069 | maybe_complain_about_tail_call (exp, |
| 1070 | "inconsistent size of stack space" |
| 1071 | " allocated for arguments which are" |
| 1072 | " passed in registers" ); |
| 1073 | return false; |
| 1074 | } |
| 1075 | |
| 1076 | /* Check that the return value locations are the same. Like |
| 1077 | if we are returning floats on the 80387 register stack, we cannot |
| 1078 | make a sibcall from a function that doesn't return a float to a |
| 1079 | function that does or, conversely, from a function that does return |
| 1080 | a float to a function that doesn't; the necessary stack adjustment |
| 1081 | would not be executed. This is also the place we notice |
| 1082 | differences in the return value ABI. Note that it is ok for one |
| 1083 | of the functions to have void return type as long as the return |
| 1084 | value of the other is passed in a register. */ |
| 1085 | a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); |
| 1086 | b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), |
| 1087 | cfun->decl, false); |
| 1088 | if (STACK_REG_P (a) || STACK_REG_P (b)) |
| 1089 | { |
| 1090 | if (!rtx_equal_p (a, b)) |
| 1091 | return false; |
| 1092 | } |
| 1093 | else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) |
| 1094 | ; |
| 1095 | else if (!rtx_equal_p (a, b)) |
| 1096 | return false; |
| 1097 | |
| 1098 | if (TARGET_64BIT) |
| 1099 | { |
| 1100 | /* The SYSV ABI has more call-clobbered registers; |
| 1101 | disallow sibcalls from MS to SYSV. */ |
| 1102 | if (cfun->machine->call_abi == MS_ABI |
| 1103 | && ix86_function_type_abi (type) == SYSV_ABI) |
| 1104 | return false; |
| 1105 | } |
| 1106 | else |
| 1107 | { |
| 1108 | /* If this call is indirect, we'll need to be able to use a |
| 1109 | call-clobbered register for the address of the target function. |
| 1110 | Make sure that all such registers are not used for passing |
| 1111 | parameters. Note that DLLIMPORT functions and call to global |
| 1112 | function via GOT slot are indirect. */ |
| 1113 | if (!decl |
| 1114 | || (bind_global && flag_pic && !flag_plt) |
| 1115 | || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)) |
| 1116 | || flag_force_indirect_call) |
| 1117 | { |
| 1118 | /* Check if regparm >= 3 since arg_reg_available is set to |
| 1119 | false if regparm == 0. If regparm is 1 or 2, there is |
| 1120 | always a call-clobbered register available. |
| 1121 | |
| 1122 | ??? The symbol indirect call doesn't need a call-clobbered |
| 1123 | register. But we don't know if this is a symbol indirect |
| 1124 | call or not here. */ |
| 1125 | if (ix86_function_regparm (type, decl) >= 3 |
| 1126 | && !cfun->machine->arg_reg_available) |
| 1127 | return false; |
| 1128 | } |
| 1129 | } |
| 1130 | |
| 1131 | if (decl && ix86_use_pseudo_pic_reg ()) |
| 1132 | { |
| 1133 | /* When PIC register is used, it must be restored after ifunc |
| 1134 | function returns. */ |
| 1135 | cgraph_node *node = cgraph_node::get (decl); |
| 1136 | if (node && node->ifunc_resolver) |
| 1137 | return false; |
| 1138 | } |
| 1139 | |
| 1140 | /* Disable sibcall if callee has indirect_return attribute and |
| 1141 | caller doesn't since callee will return to the caller's caller |
| 1142 | via an indirect jump. */ |
| 1143 | if (((flag_cf_protection & (CF_RETURN | CF_BRANCH)) |
| 1144 | == (CF_RETURN | CF_BRANCH)) |
| 1145 | && lookup_attribute (attr_name: "indirect_return" , TYPE_ATTRIBUTES (type)) |
| 1146 | && !lookup_attribute (attr_name: "indirect_return" , |
| 1147 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))) |
| 1148 | return false; |
| 1149 | |
| 1150 | /* Otherwise okay. That also includes certain types of indirect calls. */ |
| 1151 | return true; |
| 1152 | } |
| 1153 | |
| 1154 | /* This function determines from TYPE the calling-convention. */ |
| 1155 | |
| 1156 | unsigned int |
| 1157 | ix86_get_callcvt (const_tree type) |
| 1158 | { |
| 1159 | unsigned int ret = 0; |
| 1160 | bool is_stdarg; |
| 1161 | tree attrs; |
| 1162 | |
| 1163 | if (TARGET_64BIT) |
| 1164 | return IX86_CALLCVT_CDECL; |
| 1165 | |
| 1166 | attrs = TYPE_ATTRIBUTES (type); |
| 1167 | if (attrs != NULL_TREE) |
| 1168 | { |
| 1169 | if (lookup_attribute (attr_name: "cdecl" , list: attrs)) |
| 1170 | ret |= IX86_CALLCVT_CDECL; |
| 1171 | else if (lookup_attribute (attr_name: "stdcall" , list: attrs)) |
| 1172 | ret |= IX86_CALLCVT_STDCALL; |
| 1173 | else if (lookup_attribute (attr_name: "fastcall" , list: attrs)) |
| 1174 | ret |= IX86_CALLCVT_FASTCALL; |
| 1175 | else if (lookup_attribute (attr_name: "thiscall" , list: attrs)) |
| 1176 | ret |= IX86_CALLCVT_THISCALL; |
| 1177 | |
| 1178 | /* Regparam isn't allowed for thiscall and fastcall. */ |
| 1179 | if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0) |
| 1180 | { |
| 1181 | if (lookup_attribute (attr_name: "regparm" , list: attrs)) |
| 1182 | ret |= IX86_CALLCVT_REGPARM; |
| 1183 | if (lookup_attribute (attr_name: "sseregparm" , list: attrs)) |
| 1184 | ret |= IX86_CALLCVT_SSEREGPARM; |
| 1185 | } |
| 1186 | |
| 1187 | if (IX86_BASE_CALLCVT(ret) != 0) |
| 1188 | return ret; |
| 1189 | } |
| 1190 | |
| 1191 | is_stdarg = stdarg_p (type); |
| 1192 | if (TARGET_RTD && !is_stdarg) |
| 1193 | return IX86_CALLCVT_STDCALL | ret; |
| 1194 | |
| 1195 | if (ret != 0 |
| 1196 | || is_stdarg |
| 1197 | || TREE_CODE (type) != METHOD_TYPE |
| 1198 | || ix86_function_type_abi (type) != MS_ABI) |
| 1199 | return IX86_CALLCVT_CDECL | ret; |
| 1200 | |
| 1201 | return IX86_CALLCVT_THISCALL; |
| 1202 | } |
| 1203 | |
| 1204 | /* Return 0 if the attributes for two types are incompatible, 1 if they |
| 1205 | are compatible, and 2 if they are nearly compatible (which causes a |
| 1206 | warning to be generated). */ |
| 1207 | |
| 1208 | static int |
| 1209 | ix86_comp_type_attributes (const_tree type1, const_tree type2) |
| 1210 | { |
| 1211 | unsigned int ccvt1, ccvt2; |
| 1212 | |
| 1213 | if (TREE_CODE (type1) != FUNCTION_TYPE |
| 1214 | && TREE_CODE (type1) != METHOD_TYPE) |
| 1215 | return 1; |
| 1216 | |
| 1217 | ccvt1 = ix86_get_callcvt (type: type1); |
| 1218 | ccvt2 = ix86_get_callcvt (type: type2); |
| 1219 | if (ccvt1 != ccvt2) |
| 1220 | return 0; |
| 1221 | if (ix86_function_regparm (type1, NULL) |
| 1222 | != ix86_function_regparm (type2, NULL)) |
| 1223 | return 0; |
| 1224 | |
| 1225 | if (ix86_type_no_callee_saved_registers_p (type: type1) |
| 1226 | != ix86_type_no_callee_saved_registers_p (type: type2)) |
| 1227 | return 0; |
| 1228 | |
| 1229 | /* preserve_none attribute uses a different calling convention is |
| 1230 | only for 64-bit. */ |
| 1231 | if (TARGET_64BIT |
| 1232 | && (lookup_attribute (attr_name: "preserve_none" , TYPE_ATTRIBUTES (type1)) |
| 1233 | != lookup_attribute (attr_name: "preserve_none" , |
| 1234 | TYPE_ATTRIBUTES (type2)))) |
| 1235 | return 0; |
| 1236 | |
| 1237 | return 1; |
| 1238 | } |
| 1239 | |
| 1240 | /* Return the regparm value for a function with the indicated TYPE and DECL. |
| 1241 | DECL may be NULL when calling function indirectly |
| 1242 | or considering a libcall. */ |
| 1243 | |
| 1244 | static int |
| 1245 | ix86_function_regparm (const_tree type, const_tree decl) |
| 1246 | { |
| 1247 | tree attr; |
| 1248 | int regparm; |
| 1249 | unsigned int ccvt; |
| 1250 | |
| 1251 | if (TARGET_64BIT) |
| 1252 | return (ix86_function_type_abi (type) == SYSV_ABI |
| 1253 | ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); |
| 1254 | ccvt = ix86_get_callcvt (type); |
| 1255 | regparm = ix86_regparm; |
| 1256 | |
| 1257 | if ((ccvt & IX86_CALLCVT_REGPARM) != 0) |
| 1258 | { |
| 1259 | attr = lookup_attribute (attr_name: "regparm" , TYPE_ATTRIBUTES (type)); |
| 1260 | if (attr) |
| 1261 | { |
| 1262 | regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); |
| 1263 | return regparm; |
| 1264 | } |
| 1265 | } |
| 1266 | else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
| 1267 | return 2; |
| 1268 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
| 1269 | return 1; |
| 1270 | |
| 1271 | /* Use register calling convention for local functions when possible. */ |
| 1272 | if (decl |
| 1273 | && TREE_CODE (decl) == FUNCTION_DECL) |
| 1274 | { |
| 1275 | cgraph_node *target = cgraph_node::get (decl); |
| 1276 | if (target) |
| 1277 | target = target->function_symbol (); |
| 1278 | |
| 1279 | /* Caller and callee must agree on the calling convention, so |
| 1280 | checking here just optimize means that with |
| 1281 | __attribute__((optimize (...))) caller could use regparm convention |
| 1282 | and callee not, or vice versa. Instead look at whether the callee |
| 1283 | is optimized or not. */ |
| 1284 | if (target && opt_for_fn (target->decl, optimize) |
| 1285 | && !(profile_flag && !flag_fentry)) |
| 1286 | { |
| 1287 | if (target->local && target->can_change_signature) |
| 1288 | { |
| 1289 | int local_regparm, globals = 0, regno; |
| 1290 | |
| 1291 | /* Make sure no regparm register is taken by a |
| 1292 | fixed register variable. */ |
| 1293 | for (local_regparm = 0; local_regparm < REGPARM_MAX; |
| 1294 | local_regparm++) |
| 1295 | if (fixed_regs[local_regparm]) |
| 1296 | break; |
| 1297 | |
| 1298 | /* We don't want to use regparm(3) for nested functions as |
| 1299 | these use a static chain pointer in the third argument. */ |
| 1300 | if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) |
| 1301 | local_regparm = 2; |
| 1302 | |
| 1303 | /* Save a register for the split stack. */ |
| 1304 | if (flag_split_stack) |
| 1305 | { |
| 1306 | if (local_regparm == 3) |
| 1307 | local_regparm = 2; |
| 1308 | else if (local_regparm == 2 |
| 1309 | && DECL_STATIC_CHAIN (target->decl)) |
| 1310 | local_regparm = 1; |
| 1311 | } |
| 1312 | |
| 1313 | /* Each fixed register usage increases register pressure, |
| 1314 | so less registers should be used for argument passing. |
| 1315 | This functionality can be overriden by an explicit |
| 1316 | regparm value. */ |
| 1317 | for (regno = AX_REG; regno <= DI_REG; regno++) |
| 1318 | if (fixed_regs[regno]) |
| 1319 | globals++; |
| 1320 | |
| 1321 | local_regparm |
| 1322 | = globals < local_regparm ? local_regparm - globals : 0; |
| 1323 | |
| 1324 | if (local_regparm > regparm) |
| 1325 | regparm = local_regparm; |
| 1326 | } |
| 1327 | } |
| 1328 | } |
| 1329 | |
| 1330 | return regparm; |
| 1331 | } |
| 1332 | |
| 1333 | /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and |
| 1334 | DFmode (2) arguments in SSE registers for a function with the |
| 1335 | indicated TYPE and DECL. DECL may be NULL when calling function |
| 1336 | indirectly or considering a libcall. Return -1 if any FP parameter |
| 1337 | should be rejected by error. This is used in siutation we imply SSE |
| 1338 | calling convetion but the function is called from another function with |
| 1339 | SSE disabled. Otherwise return 0. */ |
| 1340 | |
| 1341 | static int |
| 1342 | ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) |
| 1343 | { |
| 1344 | gcc_assert (!TARGET_64BIT); |
| 1345 | |
| 1346 | /* Use SSE registers to pass SFmode and DFmode arguments if requested |
| 1347 | by the sseregparm attribute. */ |
| 1348 | if (TARGET_SSEREGPARM |
| 1349 | || (type && lookup_attribute (attr_name: "sseregparm" , TYPE_ATTRIBUTES (type)))) |
| 1350 | { |
| 1351 | if (!TARGET_SSE) |
| 1352 | { |
| 1353 | if (warn) |
| 1354 | { |
| 1355 | if (decl) |
| 1356 | error ("calling %qD with attribute sseregparm without " |
| 1357 | "SSE/SSE2 enabled" , decl); |
| 1358 | else |
| 1359 | error ("calling %qT with attribute sseregparm without " |
| 1360 | "SSE/SSE2 enabled" , type); |
| 1361 | } |
| 1362 | return 0; |
| 1363 | } |
| 1364 | |
| 1365 | return 2; |
| 1366 | } |
| 1367 | |
| 1368 | if (!decl) |
| 1369 | return 0; |
| 1370 | |
| 1371 | cgraph_node *target = cgraph_node::get (decl); |
| 1372 | if (target) |
| 1373 | target = target->function_symbol (); |
| 1374 | |
| 1375 | /* For local functions, pass up to SSE_REGPARM_MAX SFmode |
| 1376 | (and DFmode for SSE2) arguments in SSE registers. */ |
| 1377 | if (target |
| 1378 | /* TARGET_SSE_MATH */ |
| 1379 | && (target_opts_for_fn (fndecl: target->decl)->x_ix86_fpmath & FPMATH_SSE) |
| 1380 | && opt_for_fn (target->decl, optimize) |
| 1381 | && !(profile_flag && !flag_fentry)) |
| 1382 | { |
| 1383 | if (target->local && target->can_change_signature) |
| 1384 | { |
| 1385 | /* Refuse to produce wrong code when local function with SSE enabled |
| 1386 | is called from SSE disabled function. |
| 1387 | FIXME: We need a way to detect these cases cross-ltrans partition |
| 1388 | and avoid using SSE calling conventions on local functions called |
| 1389 | from function with SSE disabled. For now at least delay the |
| 1390 | warning until we know we are going to produce wrong code. |
| 1391 | See PR66047 */ |
| 1392 | if (!TARGET_SSE && warn) |
| 1393 | return -1; |
| 1394 | return TARGET_SSE2_P (target_opts_for_fn (target->decl) |
| 1395 | ->x_ix86_isa_flags) ? 2 : 1; |
| 1396 | } |
| 1397 | } |
| 1398 | |
| 1399 | return 0; |
| 1400 | } |
| 1401 | |
| 1402 | /* Return true if EAX is live at the start of the function. Used by |
| 1403 | ix86_expand_prologue to determine if we need special help before |
| 1404 | calling allocate_stack_worker. */ |
| 1405 | |
| 1406 | static bool |
| 1407 | ix86_eax_live_at_start_p (void) |
| 1408 | { |
| 1409 | /* Cheat. Don't bother working forward from ix86_function_regparm |
| 1410 | to the function type to whether an actual argument is located in |
| 1411 | eax. Instead just look at cfg info, which is still close enough |
| 1412 | to correct at this point. This gives false positives for broken |
| 1413 | functions that might use uninitialized data that happens to be |
| 1414 | allocated in eax, but who cares? */ |
| 1415 | return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0); |
| 1416 | } |
| 1417 | |
| 1418 | static bool |
| 1419 | ix86_keep_aggregate_return_pointer (tree fntype) |
| 1420 | { |
| 1421 | tree attr; |
| 1422 | |
| 1423 | if (!TARGET_64BIT) |
| 1424 | { |
| 1425 | attr = lookup_attribute (attr_name: "callee_pop_aggregate_return" , |
| 1426 | TYPE_ATTRIBUTES (fntype)); |
| 1427 | if (attr) |
| 1428 | return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0); |
| 1429 | |
| 1430 | /* For 32-bit MS-ABI the default is to keep aggregate |
| 1431 | return pointer. */ |
| 1432 | if (ix86_function_type_abi (fntype) == MS_ABI) |
| 1433 | return true; |
| 1434 | } |
| 1435 | return KEEP_AGGREGATE_RETURN_POINTER != 0; |
| 1436 | } |
| 1437 | |
| 1438 | /* Value is the number of bytes of arguments automatically |
| 1439 | popped when returning from a subroutine call. |
| 1440 | FUNDECL is the declaration node of the function (as a tree), |
| 1441 | FUNTYPE is the data type of the function (as a tree), |
| 1442 | or for a library call it is an identifier node for the subroutine name. |
| 1443 | SIZE is the number of bytes of arguments passed on the stack. |
| 1444 | |
| 1445 | On the 80386, the RTD insn may be used to pop them if the number |
| 1446 | of args is fixed, but if the number is variable then the caller |
| 1447 | must pop them all. RTD can't be used for library calls now |
| 1448 | because the library is compiled with the Unix compiler. |
| 1449 | Use of RTD is a selectable option, since it is incompatible with |
| 1450 | standard Unix calling sequences. If the option is not selected, |
| 1451 | the caller must always pop the args. |
| 1452 | |
| 1453 | The attribute stdcall is equivalent to RTD on a per module basis. */ |
| 1454 | |
| 1455 | static poly_int64 |
| 1456 | ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size) |
| 1457 | { |
| 1458 | unsigned int ccvt; |
| 1459 | |
| 1460 | /* None of the 64-bit ABIs pop arguments. */ |
| 1461 | if (TARGET_64BIT) |
| 1462 | return 0; |
| 1463 | |
| 1464 | ccvt = ix86_get_callcvt (type: funtype); |
| 1465 | |
| 1466 | if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL |
| 1467 | | IX86_CALLCVT_THISCALL)) != 0 |
| 1468 | && ! stdarg_p (funtype)) |
| 1469 | return size; |
| 1470 | |
| 1471 | /* Lose any fake structure return argument if it is passed on the stack. */ |
| 1472 | if (aggregate_value_p (TREE_TYPE (funtype), fundecl) |
| 1473 | && !ix86_keep_aggregate_return_pointer (fntype: funtype)) |
| 1474 | { |
| 1475 | int nregs = ix86_function_regparm (type: funtype, decl: fundecl); |
| 1476 | if (nregs == 0) |
| 1477 | return GET_MODE_SIZE (Pmode); |
| 1478 | } |
| 1479 | |
| 1480 | return 0; |
| 1481 | } |
| 1482 | |
| 1483 | /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ |
| 1484 | |
| 1485 | static bool |
| 1486 | ix86_legitimate_combined_insn (rtx_insn *insn) |
| 1487 | { |
| 1488 | int i; |
| 1489 | |
| 1490 | /* Check operand constraints in case hard registers were propagated |
| 1491 | into insn pattern. This check prevents combine pass from |
| 1492 | generating insn patterns with invalid hard register operands. |
| 1493 | These invalid insns can eventually confuse reload to error out |
| 1494 | with a spill failure. See also PRs 46829 and 46843. */ |
| 1495 | |
| 1496 | gcc_assert (INSN_CODE (insn) >= 0); |
| 1497 | |
| 1498 | extract_insn (insn); |
| 1499 | preprocess_constraints (insn); |
| 1500 | |
| 1501 | int n_operands = recog_data.n_operands; |
| 1502 | int n_alternatives = recog_data.n_alternatives; |
| 1503 | for (i = 0; i < n_operands; i++) |
| 1504 | { |
| 1505 | rtx op = recog_data.operand[i]; |
| 1506 | machine_mode mode = GET_MODE (op); |
| 1507 | const operand_alternative *op_alt; |
| 1508 | int offset = 0; |
| 1509 | bool win; |
| 1510 | int j; |
| 1511 | |
| 1512 | /* A unary operator may be accepted by the predicate, but it |
| 1513 | is irrelevant for matching constraints. */ |
| 1514 | if (UNARY_P (op)) |
| 1515 | op = XEXP (op, 0); |
| 1516 | |
| 1517 | if (SUBREG_P (op)) |
| 1518 | { |
| 1519 | if (REG_P (SUBREG_REG (op)) |
| 1520 | && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER) |
| 1521 | offset = subreg_regno_offset (REGNO (SUBREG_REG (op)), |
| 1522 | GET_MODE (SUBREG_REG (op)), |
| 1523 | SUBREG_BYTE (op), |
| 1524 | GET_MODE (op)); |
| 1525 | op = SUBREG_REG (op); |
| 1526 | } |
| 1527 | |
| 1528 | if (!(REG_P (op) && HARD_REGISTER_P (op))) |
| 1529 | continue; |
| 1530 | |
| 1531 | op_alt = recog_op_alt; |
| 1532 | |
| 1533 | /* Operand has no constraints, anything is OK. */ |
| 1534 | win = !n_alternatives; |
| 1535 | |
| 1536 | alternative_mask preferred = get_preferred_alternatives (insn); |
| 1537 | for (j = 0; j < n_alternatives; j++, op_alt += n_operands) |
| 1538 | { |
| 1539 | if (!TEST_BIT (preferred, j)) |
| 1540 | continue; |
| 1541 | if (op_alt[i].anything_ok |
| 1542 | || (op_alt[i].matches != -1 |
| 1543 | && operands_match_p |
| 1544 | (recog_data.operand[i], |
| 1545 | recog_data.operand[op_alt[i].matches])) |
| 1546 | || reg_fits_class_p (op, op_alt[i].cl, offset, mode)) |
| 1547 | { |
| 1548 | win = true; |
| 1549 | break; |
| 1550 | } |
| 1551 | } |
| 1552 | |
| 1553 | if (!win) |
| 1554 | return false; |
| 1555 | } |
| 1556 | |
| 1557 | return true; |
| 1558 | } |
| 1559 | |
| 1560 | /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ |
| 1561 | |
| 1562 | static unsigned HOST_WIDE_INT |
| 1563 | ix86_asan_shadow_offset (void) |
| 1564 | { |
| 1565 | return SUBTARGET_SHADOW_OFFSET; |
| 1566 | } |
| 1567 | |
| 1568 | /* Argument support functions. */ |
| 1569 | |
| 1570 | /* Return true when register may be used to pass function parameters. */ |
| 1571 | bool |
| 1572 | ix86_function_arg_regno_p (int regno) |
| 1573 | { |
| 1574 | int i; |
| 1575 | enum calling_abi call_abi; |
| 1576 | const int *parm_regs; |
| 1577 | |
| 1578 | if (TARGET_SSE && SSE_REGNO_P (regno) |
| 1579 | && regno < FIRST_SSE_REG + SSE_REGPARM_MAX) |
| 1580 | return true; |
| 1581 | |
| 1582 | if (!TARGET_64BIT) |
| 1583 | return (regno < REGPARM_MAX |
| 1584 | || (TARGET_MMX && MMX_REGNO_P (regno) |
| 1585 | && regno < FIRST_MMX_REG + MMX_REGPARM_MAX)); |
| 1586 | |
| 1587 | /* TODO: The function should depend on current function ABI but |
| 1588 | builtins.cc would need updating then. Therefore we use the |
| 1589 | default ABI. */ |
| 1590 | call_abi = ix86_cfun_abi (); |
| 1591 | |
| 1592 | /* RAX is used as hidden argument to va_arg functions. */ |
| 1593 | if (call_abi == SYSV_ABI && regno == AX_REG) |
| 1594 | return true; |
| 1595 | |
| 1596 | if (cfun |
| 1597 | && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE) |
| 1598 | parm_regs = x86_64_preserve_none_int_parameter_registers; |
| 1599 | else if (call_abi == MS_ABI) |
| 1600 | parm_regs = x86_64_ms_abi_int_parameter_registers; |
| 1601 | else |
| 1602 | parm_regs = x86_64_int_parameter_registers; |
| 1603 | |
| 1604 | for (i = 0; i < (call_abi == MS_ABI |
| 1605 | ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) |
| 1606 | if (regno == parm_regs[i]) |
| 1607 | return true; |
| 1608 | return false; |
| 1609 | } |
| 1610 | |
| 1611 | /* Return if we do not know how to pass ARG solely in registers. */ |
| 1612 | |
| 1613 | static bool |
| 1614 | ix86_must_pass_in_stack (const function_arg_info &arg) |
| 1615 | { |
| 1616 | if (must_pass_in_stack_var_size_or_pad (arg)) |
| 1617 | return true; |
| 1618 | |
| 1619 | /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! |
| 1620 | The layout_type routine is crafty and tries to trick us into passing |
| 1621 | currently unsupported vector types on the stack by using TImode. */ |
| 1622 | return (!TARGET_64BIT && arg.mode == TImode |
| 1623 | && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE); |
| 1624 | } |
| 1625 | |
| 1626 | /* It returns the size, in bytes, of the area reserved for arguments passed |
| 1627 | in registers for the function represented by fndecl dependent to the used |
| 1628 | abi format. */ |
| 1629 | int |
| 1630 | ix86_reg_parm_stack_space (const_tree fndecl) |
| 1631 | { |
| 1632 | enum calling_abi call_abi = SYSV_ABI; |
| 1633 | if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) |
| 1634 | call_abi = ix86_function_abi (fndecl); |
| 1635 | else |
| 1636 | call_abi = ix86_function_type_abi (fndecl); |
| 1637 | if (TARGET_64BIT && call_abi == MS_ABI) |
| 1638 | return 32; |
| 1639 | return 0; |
| 1640 | } |
| 1641 | |
| 1642 | /* We add this as a workaround in order to use libc_has_function |
| 1643 | hook in i386.md. */ |
| 1644 | bool |
| 1645 | ix86_libc_has_function (enum function_class fn_class) |
| 1646 | { |
| 1647 | return targetm.libc_has_function (fn_class, NULL_TREE); |
| 1648 | } |
| 1649 | |
| 1650 | /* Returns value SYSV_ABI, MS_ABI dependent on fntype, |
| 1651 | specifying the call abi used. */ |
| 1652 | enum calling_abi |
| 1653 | ix86_function_type_abi (const_tree fntype) |
| 1654 | { |
| 1655 | enum calling_abi abi = ix86_abi; |
| 1656 | |
| 1657 | if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE) |
| 1658 | return abi; |
| 1659 | |
| 1660 | if (abi == SYSV_ABI |
| 1661 | && lookup_attribute (attr_name: "ms_abi" , TYPE_ATTRIBUTES (fntype))) |
| 1662 | { |
| 1663 | static int warned; |
| 1664 | if (TARGET_X32 && !warned) |
| 1665 | { |
| 1666 | error ("X32 does not support %<ms_abi%> attribute" ); |
| 1667 | warned = 1; |
| 1668 | } |
| 1669 | |
| 1670 | abi = MS_ABI; |
| 1671 | } |
| 1672 | else if (abi == MS_ABI |
| 1673 | && lookup_attribute (attr_name: "sysv_abi" , TYPE_ATTRIBUTES (fntype))) |
| 1674 | abi = SYSV_ABI; |
| 1675 | |
| 1676 | return abi; |
| 1677 | } |
| 1678 | |
| 1679 | enum calling_abi |
| 1680 | ix86_function_abi (const_tree fndecl) |
| 1681 | { |
| 1682 | return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi; |
| 1683 | } |
| 1684 | |
| 1685 | /* Returns value SYSV_ABI, MS_ABI dependent on cfun, |
| 1686 | specifying the call abi used. */ |
| 1687 | enum calling_abi |
| 1688 | ix86_cfun_abi (void) |
| 1689 | { |
| 1690 | return cfun ? cfun->machine->call_abi : ix86_abi; |
| 1691 | } |
| 1692 | |
| 1693 | bool |
| 1694 | ix86_function_ms_hook_prologue (const_tree fn) |
| 1695 | { |
| 1696 | if (fn && lookup_attribute (attr_name: "ms_hook_prologue" , DECL_ATTRIBUTES (fn))) |
| 1697 | { |
| 1698 | if (decl_function_context (fn) != NULL_TREE) |
| 1699 | error_at (DECL_SOURCE_LOCATION (fn), |
| 1700 | "%<ms_hook_prologue%> attribute is not compatible " |
| 1701 | "with nested function" ); |
| 1702 | else |
| 1703 | return true; |
| 1704 | } |
| 1705 | return false; |
| 1706 | } |
| 1707 | |
| 1708 | bool |
| 1709 | ix86_function_naked (const_tree fn) |
| 1710 | { |
| 1711 | if (fn && lookup_attribute (attr_name: "naked" , DECL_ATTRIBUTES (fn))) |
| 1712 | return true; |
| 1713 | |
| 1714 | return false; |
| 1715 | } |
| 1716 | |
| 1717 | /* Write the extra assembler code needed to declare a function properly. */ |
| 1718 | |
| 1719 | void |
| 1720 | ix86_asm_output_function_label (FILE *out_file, const char *fname, |
| 1721 | tree decl) |
| 1722 | { |
| 1723 | bool is_ms_hook = ix86_function_ms_hook_prologue (fn: decl); |
| 1724 | |
| 1725 | if (cfun) |
| 1726 | cfun->machine->function_label_emitted = true; |
| 1727 | |
| 1728 | if (is_ms_hook) |
| 1729 | { |
| 1730 | int i, filler_count = (TARGET_64BIT ? 32 : 16); |
| 1731 | unsigned int filler_cc = 0xcccccccc; |
| 1732 | |
| 1733 | for (i = 0; i < filler_count; i += 4) |
| 1734 | fprintf (stream: out_file, ASM_LONG " %#x\n" , filler_cc); |
| 1735 | } |
| 1736 | |
| 1737 | #ifdef SUBTARGET_ASM_UNWIND_INIT |
| 1738 | SUBTARGET_ASM_UNWIND_INIT (out_file); |
| 1739 | #endif |
| 1740 | |
| 1741 | assemble_function_label_raw (out_file, fname); |
| 1742 | |
| 1743 | /* Output magic byte marker, if hot-patch attribute is set. */ |
| 1744 | if (is_ms_hook) |
| 1745 | { |
| 1746 | if (TARGET_64BIT) |
| 1747 | { |
| 1748 | /* leaq [%rsp + 0], %rsp */ |
| 1749 | fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n" , |
| 1750 | stream: out_file); |
| 1751 | } |
| 1752 | else |
| 1753 | { |
| 1754 | /* movl.s %edi, %edi |
| 1755 | push %ebp |
| 1756 | movl.s %esp, %ebp */ |
| 1757 | fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n" , stream: out_file); |
| 1758 | } |
| 1759 | } |
| 1760 | } |
| 1761 | |
| 1762 | /* Output a user-defined label. In AT&T syntax, registers are prefixed |
| 1763 | with %, so labels require no punctuation. In Intel syntax, registers |
| 1764 | are unprefixed, so labels may clash with registers or other operators, |
| 1765 | and require quoting. */ |
| 1766 | void |
| 1767 | ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label) |
| 1768 | { |
| 1769 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 1770 | fprintf (stream: file, format: "%s%s" , prefix, label); |
| 1771 | else |
| 1772 | fprintf (stream: file, format: "\"%s%s\"" , prefix, label); |
| 1773 | } |
| 1774 | |
| 1775 | /* Implementation of call abi switching target hook. Specific to FNDECL |
| 1776 | the specific call register sets are set. See also |
| 1777 | ix86_conditional_register_usage for more details. */ |
| 1778 | void |
| 1779 | ix86_call_abi_override (const_tree fndecl) |
| 1780 | { |
| 1781 | cfun->machine->call_abi = ix86_function_abi (fndecl); |
| 1782 | } |
| 1783 | |
| 1784 | /* Return 1 if pseudo register should be created and used to hold |
| 1785 | GOT address for PIC code. */ |
| 1786 | bool |
| 1787 | ix86_use_pseudo_pic_reg (void) |
| 1788 | { |
| 1789 | if ((TARGET_64BIT |
| 1790 | && (ix86_cmodel == CM_SMALL_PIC |
| 1791 | || TARGET_PECOFF)) |
| 1792 | || !flag_pic) |
| 1793 | return false; |
| 1794 | return true; |
| 1795 | } |
| 1796 | |
| 1797 | /* Initialize large model PIC register. */ |
| 1798 | |
| 1799 | static void |
| 1800 | ix86_init_large_pic_reg (unsigned int tmp_regno) |
| 1801 | { |
| 1802 | rtx_code_label *label; |
| 1803 | rtx tmp_reg; |
| 1804 | |
| 1805 | gcc_assert (Pmode == DImode); |
| 1806 | label = gen_label_rtx (); |
| 1807 | emit_label (label); |
| 1808 | LABEL_PRESERVE_P (label) = 1; |
| 1809 | tmp_reg = gen_rtx_REG (Pmode, tmp_regno); |
| 1810 | gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno); |
| 1811 | emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, |
| 1812 | label)); |
| 1813 | emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); |
| 1814 | emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg)); |
| 1815 | const char *name = LABEL_NAME (label); |
| 1816 | PUT_CODE (label, NOTE); |
| 1817 | NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL; |
| 1818 | NOTE_DELETED_LABEL_NAME (label) = name; |
| 1819 | } |
| 1820 | |
| 1821 | /* Create and initialize PIC register if required. */ |
| 1822 | static void |
| 1823 | ix86_init_pic_reg (void) |
| 1824 | { |
| 1825 | edge entry_edge; |
| 1826 | rtx_insn *seq; |
| 1827 | |
| 1828 | if (!ix86_use_pseudo_pic_reg ()) |
| 1829 | return; |
| 1830 | |
| 1831 | start_sequence (); |
| 1832 | |
| 1833 | if (TARGET_64BIT) |
| 1834 | { |
| 1835 | if (ix86_cmodel == CM_LARGE_PIC) |
| 1836 | ix86_init_large_pic_reg (R11_REG); |
| 1837 | else |
| 1838 | emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); |
| 1839 | } |
| 1840 | else |
| 1841 | { |
| 1842 | /* If there is future mcount call in the function it is more profitable |
| 1843 | to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ |
| 1844 | rtx reg = crtl->profile |
| 1845 | ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) |
| 1846 | : pic_offset_table_rtx; |
| 1847 | rtx_insn *insn = emit_insn (gen_set_got (reg)); |
| 1848 | RTX_FRAME_RELATED_P (insn) = 1; |
| 1849 | if (crtl->profile) |
| 1850 | emit_move_insn (pic_offset_table_rtx, reg); |
| 1851 | add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); |
| 1852 | } |
| 1853 | |
| 1854 | seq = end_sequence (); |
| 1855 | |
| 1856 | entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
| 1857 | insert_insn_on_edge (seq, entry_edge); |
| 1858 | commit_one_edge_insertion (e: entry_edge); |
| 1859 | } |
| 1860 | |
| 1861 | /* Initialize a variable CUM of type CUMULATIVE_ARGS |
| 1862 | for a call to a function whose data type is FNTYPE. |
| 1863 | For a library call, FNTYPE is 0. */ |
| 1864 | |
| 1865 | void |
| 1866 | init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ |
| 1867 | tree fntype, /* tree ptr for function decl */ |
| 1868 | rtx libname, /* SYMBOL_REF of library name or 0 */ |
| 1869 | tree fndecl, |
| 1870 | int caller) |
| 1871 | { |
| 1872 | struct cgraph_node *local_info_node = NULL; |
| 1873 | struct cgraph_node *target = NULL; |
| 1874 | |
| 1875 | /* Set silent_p to false to raise an error for invalid calls when |
| 1876 | expanding function body. */ |
| 1877 | cfun->machine->silent_p = false; |
| 1878 | |
| 1879 | memset (s: cum, c: 0, n: sizeof (*cum)); |
| 1880 | |
| 1881 | tree preserve_none_type; |
| 1882 | if (fndecl) |
| 1883 | { |
| 1884 | target = cgraph_node::get (decl: fndecl); |
| 1885 | if (target) |
| 1886 | { |
| 1887 | target = target->function_symbol (); |
| 1888 | local_info_node = cgraph_node::local_info_node (decl: target->decl); |
| 1889 | cum->call_abi = ix86_function_abi (fndecl: target->decl); |
| 1890 | preserve_none_type = TREE_TYPE (target->decl); |
| 1891 | } |
| 1892 | else |
| 1893 | { |
| 1894 | cum->call_abi = ix86_function_abi (fndecl); |
| 1895 | preserve_none_type = TREE_TYPE (fndecl); |
| 1896 | } |
| 1897 | } |
| 1898 | else |
| 1899 | { |
| 1900 | cum->call_abi = ix86_function_type_abi (fntype); |
| 1901 | preserve_none_type = fntype; |
| 1902 | } |
| 1903 | cum->preserve_none_abi |
| 1904 | = (preserve_none_type |
| 1905 | && (lookup_attribute (attr_name: "preserve_none" , |
| 1906 | TYPE_ATTRIBUTES (preserve_none_type)) |
| 1907 | != nullptr)); |
| 1908 | |
| 1909 | cum->caller = caller; |
| 1910 | |
| 1911 | /* Set up the number of registers to use for passing arguments. */ |
| 1912 | cum->nregs = ix86_regparm; |
| 1913 | if (TARGET_64BIT) |
| 1914 | { |
| 1915 | cum->nregs = (cum->call_abi == SYSV_ABI |
| 1916 | ? X86_64_REGPARM_MAX |
| 1917 | : X86_64_MS_REGPARM_MAX); |
| 1918 | } |
| 1919 | if (TARGET_SSE) |
| 1920 | { |
| 1921 | cum->sse_nregs = SSE_REGPARM_MAX; |
| 1922 | if (TARGET_64BIT) |
| 1923 | { |
| 1924 | cum->sse_nregs = (cum->call_abi == SYSV_ABI |
| 1925 | ? X86_64_SSE_REGPARM_MAX |
| 1926 | : X86_64_MS_SSE_REGPARM_MAX); |
| 1927 | } |
| 1928 | } |
| 1929 | if (TARGET_MMX) |
| 1930 | cum->mmx_nregs = MMX_REGPARM_MAX; |
| 1931 | cum->warn_avx512f = true; |
| 1932 | cum->warn_avx = true; |
| 1933 | cum->warn_sse = true; |
| 1934 | cum->warn_mmx = true; |
| 1935 | |
| 1936 | /* Because type might mismatch in between caller and callee, we need to |
| 1937 | use actual type of function for local calls. |
| 1938 | FIXME: cgraph_analyze can be told to actually record if function uses |
| 1939 | va_start so for local functions maybe_vaarg can be made aggressive |
| 1940 | helping K&R code. |
| 1941 | FIXME: once typesytem is fixed, we won't need this code anymore. */ |
| 1942 | if (local_info_node && local_info_node->local |
| 1943 | && local_info_node->can_change_signature) |
| 1944 | fntype = TREE_TYPE (target->decl); |
| 1945 | cum->stdarg = stdarg_p (fntype); |
| 1946 | cum->maybe_vaarg = (fntype |
| 1947 | ? (!prototype_p (fntype) || stdarg_p (fntype)) |
| 1948 | : !libname); |
| 1949 | |
| 1950 | cum->decl = fndecl; |
| 1951 | |
| 1952 | cum->warn_empty = !warn_abi || cum->stdarg; |
| 1953 | if (!cum->warn_empty && fntype) |
| 1954 | { |
| 1955 | function_args_iterator iter; |
| 1956 | tree argtype; |
| 1957 | bool seen_empty_type = false; |
| 1958 | FOREACH_FUNCTION_ARGS (fntype, argtype, iter) |
| 1959 | { |
| 1960 | if (argtype == error_mark_node || VOID_TYPE_P (argtype)) |
| 1961 | break; |
| 1962 | if (TYPE_EMPTY_P (argtype)) |
| 1963 | seen_empty_type = true; |
| 1964 | else if (seen_empty_type) |
| 1965 | { |
| 1966 | cum->warn_empty = true; |
| 1967 | break; |
| 1968 | } |
| 1969 | } |
| 1970 | } |
| 1971 | |
| 1972 | if (!TARGET_64BIT) |
| 1973 | { |
| 1974 | /* If there are variable arguments, then we won't pass anything |
| 1975 | in registers in 32-bit mode. */ |
| 1976 | if (stdarg_p (fntype)) |
| 1977 | { |
| 1978 | cum->nregs = 0; |
| 1979 | /* Since in 32-bit, variable arguments are always passed on |
| 1980 | stack, there is scratch register available for indirect |
| 1981 | sibcall. */ |
| 1982 | cfun->machine->arg_reg_available = true; |
| 1983 | cum->sse_nregs = 0; |
| 1984 | cum->mmx_nregs = 0; |
| 1985 | cum->warn_avx512f = false; |
| 1986 | cum->warn_avx = false; |
| 1987 | cum->warn_sse = false; |
| 1988 | cum->warn_mmx = false; |
| 1989 | return; |
| 1990 | } |
| 1991 | |
| 1992 | /* Use ecx and edx registers if function has fastcall attribute, |
| 1993 | else look for regparm information. */ |
| 1994 | if (fntype) |
| 1995 | { |
| 1996 | unsigned int ccvt = ix86_get_callcvt (type: fntype); |
| 1997 | if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
| 1998 | { |
| 1999 | cum->nregs = 1; |
| 2000 | cum->fastcall = 1; /* Same first register as in fastcall. */ |
| 2001 | } |
| 2002 | else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
| 2003 | { |
| 2004 | cum->nregs = 2; |
| 2005 | cum->fastcall = 1; |
| 2006 | } |
| 2007 | else |
| 2008 | cum->nregs = ix86_function_regparm (type: fntype, decl: fndecl); |
| 2009 | } |
| 2010 | |
| 2011 | /* Set up the number of SSE registers used for passing SFmode |
| 2012 | and DFmode arguments. Warn for mismatching ABI. */ |
| 2013 | cum->float_in_sse = ix86_function_sseregparm (type: fntype, decl: fndecl, warn: true); |
| 2014 | } |
| 2015 | |
| 2016 | cfun->machine->arg_reg_available = (cum->nregs > 0); |
| 2017 | } |
| 2018 | |
| 2019 | /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. |
| 2020 | But in the case of vector types, it is some vector mode. |
| 2021 | |
| 2022 | When we have only some of our vector isa extensions enabled, then there |
| 2023 | are some modes for which vector_mode_supported_p is false. For these |
| 2024 | modes, the generic vector support in gcc will choose some non-vector mode |
| 2025 | in order to implement the type. By computing the natural mode, we'll |
| 2026 | select the proper ABI location for the operand and not depend on whatever |
| 2027 | the middle-end decides to do with these vector types. |
| 2028 | |
| 2029 | The midde-end can't deal with the vector types > 16 bytes. In this |
| 2030 | case, we return the original mode and warn ABI change if CUM isn't |
| 2031 | NULL. |
| 2032 | |
| 2033 | If INT_RETURN is true, warn ABI change if the vector mode isn't |
| 2034 | available for function return value. */ |
| 2035 | |
| 2036 | static machine_mode |
| 2037 | type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, |
| 2038 | bool in_return) |
| 2039 | { |
| 2040 | machine_mode mode = TYPE_MODE (type); |
| 2041 | |
| 2042 | if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode)) |
| 2043 | { |
| 2044 | HOST_WIDE_INT size = int_size_in_bytes (type); |
| 2045 | if ((size == 8 || size == 16 || size == 32 || size == 64) |
| 2046 | /* ??? Generic code allows us to create width 1 vectors. Ignore. */ |
| 2047 | && TYPE_VECTOR_SUBPARTS (node: type) > 1) |
| 2048 | { |
| 2049 | machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); |
| 2050 | |
| 2051 | /* There are no XFmode vector modes ... */ |
| 2052 | if (innermode == XFmode) |
| 2053 | return mode; |
| 2054 | |
| 2055 | /* ... and no decimal float vector modes. */ |
| 2056 | if (DECIMAL_FLOAT_MODE_P (innermode)) |
| 2057 | return mode; |
| 2058 | |
| 2059 | if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type))) |
| 2060 | mode = MIN_MODE_VECTOR_FLOAT; |
| 2061 | else |
| 2062 | mode = MIN_MODE_VECTOR_INT; |
| 2063 | |
| 2064 | /* Get the mode which has this inner mode and number of units. */ |
| 2065 | FOR_EACH_MODE_FROM (mode, mode) |
| 2066 | if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (node: type) |
| 2067 | && GET_MODE_INNER (mode) == innermode) |
| 2068 | { |
| 2069 | if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU) |
| 2070 | { |
| 2071 | static bool warnedavx512f; |
| 2072 | static bool warnedavx512f_ret; |
| 2073 | |
| 2074 | if (cum && cum->warn_avx512f && !warnedavx512f) |
| 2075 | { |
| 2076 | if (warning (OPT_Wpsabi, "AVX512F vector argument " |
| 2077 | "without AVX512F enabled changes the ABI" )) |
| 2078 | warnedavx512f = true; |
| 2079 | } |
| 2080 | else if (in_return && !warnedavx512f_ret) |
| 2081 | { |
| 2082 | if (warning (OPT_Wpsabi, "AVX512F vector return " |
| 2083 | "without AVX512F enabled changes the ABI" )) |
| 2084 | warnedavx512f_ret = true; |
| 2085 | } |
| 2086 | |
| 2087 | return TYPE_MODE (type); |
| 2088 | } |
| 2089 | else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU) |
| 2090 | { |
| 2091 | static bool warnedavx; |
| 2092 | static bool warnedavx_ret; |
| 2093 | |
| 2094 | if (cum && cum->warn_avx && !warnedavx) |
| 2095 | { |
| 2096 | if (warning (OPT_Wpsabi, "AVX vector argument " |
| 2097 | "without AVX enabled changes the ABI" )) |
| 2098 | warnedavx = true; |
| 2099 | } |
| 2100 | else if (in_return && !warnedavx_ret) |
| 2101 | { |
| 2102 | if (warning (OPT_Wpsabi, "AVX vector return " |
| 2103 | "without AVX enabled changes the ABI" )) |
| 2104 | warnedavx_ret = true; |
| 2105 | } |
| 2106 | |
| 2107 | return TYPE_MODE (type); |
| 2108 | } |
| 2109 | else if (((size == 8 && TARGET_64BIT) || size == 16) |
| 2110 | && !TARGET_SSE |
| 2111 | && !TARGET_IAMCU) |
| 2112 | { |
| 2113 | static bool warnedsse; |
| 2114 | static bool warnedsse_ret; |
| 2115 | |
| 2116 | if (cum && cum->warn_sse && !warnedsse) |
| 2117 | { |
| 2118 | if (warning (OPT_Wpsabi, "SSE vector argument " |
| 2119 | "without SSE enabled changes the ABI" )) |
| 2120 | warnedsse = true; |
| 2121 | } |
| 2122 | else if (!TARGET_64BIT && in_return && !warnedsse_ret) |
| 2123 | { |
| 2124 | if (warning (OPT_Wpsabi, "SSE vector return " |
| 2125 | "without SSE enabled changes the ABI" )) |
| 2126 | warnedsse_ret = true; |
| 2127 | } |
| 2128 | } |
| 2129 | else if ((size == 8 && !TARGET_64BIT) |
| 2130 | && (!cfun |
| 2131 | || cfun->machine->func_type == TYPE_NORMAL) |
| 2132 | && !TARGET_MMX |
| 2133 | && !TARGET_IAMCU) |
| 2134 | { |
| 2135 | static bool warnedmmx; |
| 2136 | static bool warnedmmx_ret; |
| 2137 | |
| 2138 | if (cum && cum->warn_mmx && !warnedmmx) |
| 2139 | { |
| 2140 | if (warning (OPT_Wpsabi, "MMX vector argument " |
| 2141 | "without MMX enabled changes the ABI" )) |
| 2142 | warnedmmx = true; |
| 2143 | } |
| 2144 | else if (in_return && !warnedmmx_ret) |
| 2145 | { |
| 2146 | if (warning (OPT_Wpsabi, "MMX vector return " |
| 2147 | "without MMX enabled changes the ABI" )) |
| 2148 | warnedmmx_ret = true; |
| 2149 | } |
| 2150 | } |
| 2151 | return mode; |
| 2152 | } |
| 2153 | |
| 2154 | gcc_unreachable (); |
| 2155 | } |
| 2156 | } |
| 2157 | |
| 2158 | return mode; |
| 2159 | } |
| 2160 | |
| 2161 | /* We want to pass a value in REGNO whose "natural" mode is MODE. However, |
| 2162 | this may not agree with the mode that the type system has chosen for the |
| 2163 | register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can |
| 2164 | go ahead and use it. Otherwise we have to build a PARALLEL instead. */ |
| 2165 | |
| 2166 | static rtx |
| 2167 | gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode, |
| 2168 | unsigned int regno) |
| 2169 | { |
| 2170 | rtx tmp; |
| 2171 | |
| 2172 | if (orig_mode != BLKmode) |
| 2173 | tmp = gen_rtx_REG (orig_mode, regno); |
| 2174 | else |
| 2175 | { |
| 2176 | tmp = gen_rtx_REG (mode, regno); |
| 2177 | tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); |
| 2178 | tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); |
| 2179 | } |
| 2180 | |
| 2181 | return tmp; |
| 2182 | } |
| 2183 | |
| 2184 | /* x86-64 register passing implementation. See x86-64 ABI for details. Goal |
| 2185 | of this code is to classify each 8bytes of incoming argument by the register |
| 2186 | class and assign registers accordingly. */ |
| 2187 | |
| 2188 | /* Return the union class of CLASS1 and CLASS2. |
| 2189 | See the x86-64 PS ABI for details. */ |
| 2190 | |
| 2191 | static enum x86_64_reg_class |
| 2192 | merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) |
| 2193 | { |
| 2194 | /* Rule #1: If both classes are equal, this is the resulting class. */ |
| 2195 | if (class1 == class2) |
| 2196 | return class1; |
| 2197 | |
| 2198 | /* Rule #2: If one of the classes is NO_CLASS, the resulting class is |
| 2199 | the other class. */ |
| 2200 | if (class1 == X86_64_NO_CLASS) |
| 2201 | return class2; |
| 2202 | if (class2 == X86_64_NO_CLASS) |
| 2203 | return class1; |
| 2204 | |
| 2205 | /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ |
| 2206 | if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) |
| 2207 | return X86_64_MEMORY_CLASS; |
| 2208 | |
| 2209 | /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ |
| 2210 | if ((class1 == X86_64_INTEGERSI_CLASS |
| 2211 | && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS)) |
| 2212 | || (class2 == X86_64_INTEGERSI_CLASS |
| 2213 | && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS))) |
| 2214 | return X86_64_INTEGERSI_CLASS; |
| 2215 | if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS |
| 2216 | || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) |
| 2217 | return X86_64_INTEGER_CLASS; |
| 2218 | |
| 2219 | /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, |
| 2220 | MEMORY is used. */ |
| 2221 | if (class1 == X86_64_X87_CLASS |
| 2222 | || class1 == X86_64_X87UP_CLASS |
| 2223 | || class1 == X86_64_COMPLEX_X87_CLASS |
| 2224 | || class2 == X86_64_X87_CLASS |
| 2225 | || class2 == X86_64_X87UP_CLASS |
| 2226 | || class2 == X86_64_COMPLEX_X87_CLASS) |
| 2227 | return X86_64_MEMORY_CLASS; |
| 2228 | |
| 2229 | /* Rule #6: Otherwise class SSE is used. */ |
| 2230 | return X86_64_SSE_CLASS; |
| 2231 | } |
| 2232 | |
| 2233 | /* Classify the argument of type TYPE and mode MODE. |
| 2234 | CLASSES will be filled by the register class used to pass each word |
| 2235 | of the operand. The number of words is returned. In case the parameter |
| 2236 | should be passed in memory, 0 is returned. As a special case for zero |
| 2237 | sized containers, classes[0] will be NO_CLASS and 1 is returned. |
| 2238 | |
| 2239 | BIT_OFFSET is used internally for handling records and specifies offset |
| 2240 | of the offset in bits modulo 512 to avoid overflow cases. |
| 2241 | |
| 2242 | See the x86-64 PS ABI for details. |
| 2243 | */ |
| 2244 | |
| 2245 | static int |
| 2246 | classify_argument (machine_mode mode, const_tree type, |
| 2247 | enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset, |
| 2248 | int &zero_width_bitfields) |
| 2249 | { |
| 2250 | HOST_WIDE_INT bytes |
| 2251 | = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
| 2252 | int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); |
| 2253 | |
| 2254 | /* Variable sized entities are always passed/returned in memory. */ |
| 2255 | if (bytes < 0) |
| 2256 | return 0; |
| 2257 | |
| 2258 | if (mode != VOIDmode) |
| 2259 | { |
| 2260 | /* The value of "named" doesn't matter. */ |
| 2261 | function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true); |
| 2262 | if (targetm.calls.must_pass_in_stack (arg)) |
| 2263 | return 0; |
| 2264 | } |
| 2265 | |
| 2266 | if (type && (AGGREGATE_TYPE_P (type) |
| 2267 | || (TREE_CODE (type) == BITINT_TYPE && words > 1))) |
| 2268 | { |
| 2269 | int i; |
| 2270 | tree field; |
| 2271 | enum x86_64_reg_class subclasses[MAX_CLASSES]; |
| 2272 | |
| 2273 | /* On x86-64 we pass structures larger than 64 bytes on the stack. */ |
| 2274 | if (bytes > 64) |
| 2275 | return 0; |
| 2276 | |
| 2277 | for (i = 0; i < words; i++) |
| 2278 | classes[i] = X86_64_NO_CLASS; |
| 2279 | |
| 2280 | /* Zero sized arrays or structures are NO_CLASS. We return 0 to |
| 2281 | signalize memory class, so handle it as special case. */ |
| 2282 | if (!words) |
| 2283 | { |
| 2284 | classes[0] = X86_64_NO_CLASS; |
| 2285 | return 1; |
| 2286 | } |
| 2287 | |
| 2288 | /* Classify each field of record and merge classes. */ |
| 2289 | switch (TREE_CODE (type)) |
| 2290 | { |
| 2291 | case RECORD_TYPE: |
| 2292 | /* And now merge the fields of structure. */ |
| 2293 | for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
| 2294 | { |
| 2295 | if (TREE_CODE (field) == FIELD_DECL) |
| 2296 | { |
| 2297 | int num; |
| 2298 | |
| 2299 | if (TREE_TYPE (field) == error_mark_node) |
| 2300 | continue; |
| 2301 | |
| 2302 | /* Bitfields are always classified as integer. Handle them |
| 2303 | early, since later code would consider them to be |
| 2304 | misaligned integers. */ |
| 2305 | if (DECL_BIT_FIELD (field)) |
| 2306 | { |
| 2307 | if (integer_zerop (DECL_SIZE (field))) |
| 2308 | { |
| 2309 | if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field)) |
| 2310 | continue; |
| 2311 | if (zero_width_bitfields != 2) |
| 2312 | { |
| 2313 | zero_width_bitfields = 1; |
| 2314 | continue; |
| 2315 | } |
| 2316 | } |
| 2317 | for (i = (int_bit_position (field) |
| 2318 | + (bit_offset % 64)) / 8 / 8; |
| 2319 | i < ((int_bit_position (field) + (bit_offset % 64)) |
| 2320 | + tree_to_shwi (DECL_SIZE (field)) |
| 2321 | + 63) / 8 / 8; i++) |
| 2322 | classes[i] |
| 2323 | = merge_classes (class1: X86_64_INTEGER_CLASS, class2: classes[i]); |
| 2324 | } |
| 2325 | else |
| 2326 | { |
| 2327 | int pos; |
| 2328 | |
| 2329 | type = TREE_TYPE (field); |
| 2330 | |
| 2331 | /* Flexible array member is ignored. */ |
| 2332 | if (TYPE_MODE (type) == BLKmode |
| 2333 | && TREE_CODE (type) == ARRAY_TYPE |
| 2334 | && TYPE_SIZE (type) == NULL_TREE |
| 2335 | && TYPE_DOMAIN (type) != NULL_TREE |
| 2336 | && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) |
| 2337 | == NULL_TREE)) |
| 2338 | { |
| 2339 | static bool warned; |
| 2340 | |
| 2341 | if (!warned && warn_psabi) |
| 2342 | { |
| 2343 | warned = true; |
| 2344 | inform (input_location, |
| 2345 | "the ABI of passing struct with" |
| 2346 | " a flexible array member has" |
| 2347 | " changed in GCC 4.4" ); |
| 2348 | } |
| 2349 | continue; |
| 2350 | } |
| 2351 | num = classify_argument (TYPE_MODE (type), type, |
| 2352 | classes: subclasses, |
| 2353 | bit_offset: (int_bit_position (field) |
| 2354 | + bit_offset) % 512, |
| 2355 | zero_width_bitfields); |
| 2356 | if (!num) |
| 2357 | return 0; |
| 2358 | pos = (int_bit_position (field) |
| 2359 | + (bit_offset % 64)) / 8 / 8; |
| 2360 | for (i = 0; i < num && (i + pos) < words; i++) |
| 2361 | classes[i + pos] |
| 2362 | = merge_classes (class1: subclasses[i], class2: classes[i + pos]); |
| 2363 | } |
| 2364 | } |
| 2365 | } |
| 2366 | break; |
| 2367 | |
| 2368 | case ARRAY_TYPE: |
| 2369 | /* Arrays are handled as small records. */ |
| 2370 | { |
| 2371 | int num; |
| 2372 | num = classify_argument (TYPE_MODE (TREE_TYPE (type)), |
| 2373 | TREE_TYPE (type), classes: subclasses, bit_offset, |
| 2374 | zero_width_bitfields); |
| 2375 | if (!num) |
| 2376 | return 0; |
| 2377 | |
| 2378 | /* The partial classes are now full classes. */ |
| 2379 | if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) |
| 2380 | subclasses[0] = X86_64_SSE_CLASS; |
| 2381 | if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2) |
| 2382 | subclasses[0] = X86_64_SSE_CLASS; |
| 2383 | if (subclasses[0] == X86_64_INTEGERSI_CLASS |
| 2384 | && !((bit_offset % 64) == 0 && bytes == 4)) |
| 2385 | subclasses[0] = X86_64_INTEGER_CLASS; |
| 2386 | |
| 2387 | for (i = 0; i < words; i++) |
| 2388 | classes[i] = subclasses[i % num]; |
| 2389 | |
| 2390 | break; |
| 2391 | } |
| 2392 | case UNION_TYPE: |
| 2393 | case QUAL_UNION_TYPE: |
| 2394 | /* Unions are similar to RECORD_TYPE but offset is always 0. |
| 2395 | */ |
| 2396 | for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
| 2397 | { |
| 2398 | if (TREE_CODE (field) == FIELD_DECL) |
| 2399 | { |
| 2400 | int num; |
| 2401 | |
| 2402 | if (TREE_TYPE (field) == error_mark_node) |
| 2403 | continue; |
| 2404 | |
| 2405 | num = classify_argument (TYPE_MODE (TREE_TYPE (field)), |
| 2406 | TREE_TYPE (field), classes: subclasses, |
| 2407 | bit_offset, zero_width_bitfields); |
| 2408 | if (!num) |
| 2409 | return 0; |
| 2410 | for (i = 0; i < num && i < words; i++) |
| 2411 | classes[i] = merge_classes (class1: subclasses[i], class2: classes[i]); |
| 2412 | } |
| 2413 | } |
| 2414 | break; |
| 2415 | |
| 2416 | case BITINT_TYPE: |
| 2417 | /* _BitInt(N) for N > 64 is passed as structure containing |
| 2418 | (N + 63) / 64 64-bit elements. */ |
| 2419 | if (words > 2) |
| 2420 | return 0; |
| 2421 | classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
| 2422 | return 2; |
| 2423 | |
| 2424 | default: |
| 2425 | gcc_unreachable (); |
| 2426 | } |
| 2427 | |
| 2428 | if (words > 2) |
| 2429 | { |
| 2430 | /* When size > 16 bytes, if the first one isn't |
| 2431 | X86_64_SSE_CLASS or any other ones aren't |
| 2432 | X86_64_SSEUP_CLASS, everything should be passed in |
| 2433 | memory. */ |
| 2434 | if (classes[0] != X86_64_SSE_CLASS) |
| 2435 | return 0; |
| 2436 | |
| 2437 | for (i = 1; i < words; i++) |
| 2438 | if (classes[i] != X86_64_SSEUP_CLASS) |
| 2439 | return 0; |
| 2440 | } |
| 2441 | |
| 2442 | /* Final merger cleanup. */ |
| 2443 | for (i = 0; i < words; i++) |
| 2444 | { |
| 2445 | /* If one class is MEMORY, everything should be passed in |
| 2446 | memory. */ |
| 2447 | if (classes[i] == X86_64_MEMORY_CLASS) |
| 2448 | return 0; |
| 2449 | |
| 2450 | /* The X86_64_SSEUP_CLASS should be always preceded by |
| 2451 | X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ |
| 2452 | if (classes[i] == X86_64_SSEUP_CLASS |
| 2453 | && classes[i - 1] != X86_64_SSE_CLASS |
| 2454 | && classes[i - 1] != X86_64_SSEUP_CLASS) |
| 2455 | { |
| 2456 | /* The first one should never be X86_64_SSEUP_CLASS. */ |
| 2457 | gcc_assert (i != 0); |
| 2458 | classes[i] = X86_64_SSE_CLASS; |
| 2459 | } |
| 2460 | |
| 2461 | /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, |
| 2462 | everything should be passed in memory. */ |
| 2463 | if (classes[i] == X86_64_X87UP_CLASS |
| 2464 | && (classes[i - 1] != X86_64_X87_CLASS)) |
| 2465 | { |
| 2466 | static bool warned; |
| 2467 | |
| 2468 | /* The first one should never be X86_64_X87UP_CLASS. */ |
| 2469 | gcc_assert (i != 0); |
| 2470 | if (!warned && warn_psabi) |
| 2471 | { |
| 2472 | warned = true; |
| 2473 | inform (input_location, |
| 2474 | "the ABI of passing union with %<long double%>" |
| 2475 | " has changed in GCC 4.4" ); |
| 2476 | } |
| 2477 | return 0; |
| 2478 | } |
| 2479 | } |
| 2480 | return words; |
| 2481 | } |
| 2482 | |
| 2483 | /* Compute alignment needed. We align all types to natural boundaries with |
| 2484 | exception of XFmode that is aligned to 64bits. */ |
| 2485 | if (mode != VOIDmode && mode != BLKmode) |
| 2486 | { |
| 2487 | int mode_alignment = GET_MODE_BITSIZE (mode); |
| 2488 | |
| 2489 | if (mode == XFmode) |
| 2490 | mode_alignment = 128; |
| 2491 | else if (mode == XCmode) |
| 2492 | mode_alignment = 256; |
| 2493 | if (COMPLEX_MODE_P (mode)) |
| 2494 | mode_alignment /= 2; |
| 2495 | /* Misaligned fields are always returned in memory. */ |
| 2496 | if (bit_offset % mode_alignment) |
| 2497 | return 0; |
| 2498 | } |
| 2499 | |
| 2500 | /* for V1xx modes, just use the base mode */ |
| 2501 | if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode |
| 2502 | && GET_MODE_UNIT_SIZE (mode) == bytes) |
| 2503 | mode = GET_MODE_INNER (mode); |
| 2504 | |
| 2505 | /* Classification of atomic types. */ |
| 2506 | switch (mode) |
| 2507 | { |
| 2508 | case E_SDmode: |
| 2509 | case E_DDmode: |
| 2510 | classes[0] = X86_64_SSE_CLASS; |
| 2511 | return 1; |
| 2512 | case E_TDmode: |
| 2513 | classes[0] = X86_64_SSE_CLASS; |
| 2514 | classes[1] = X86_64_SSEUP_CLASS; |
| 2515 | return 2; |
| 2516 | case E_DImode: |
| 2517 | case E_SImode: |
| 2518 | case E_HImode: |
| 2519 | case E_QImode: |
| 2520 | case E_CSImode: |
| 2521 | case E_CHImode: |
| 2522 | case E_CQImode: |
| 2523 | { |
| 2524 | int size = bit_offset + (int) GET_MODE_BITSIZE (mode); |
| 2525 | |
| 2526 | /* Analyze last 128 bits only. */ |
| 2527 | size = (size - 1) & 0x7f; |
| 2528 | |
| 2529 | if (size < 32) |
| 2530 | { |
| 2531 | classes[0] = X86_64_INTEGERSI_CLASS; |
| 2532 | return 1; |
| 2533 | } |
| 2534 | else if (size < 64) |
| 2535 | { |
| 2536 | classes[0] = X86_64_INTEGER_CLASS; |
| 2537 | return 1; |
| 2538 | } |
| 2539 | else if (size < 64+32) |
| 2540 | { |
| 2541 | classes[0] = X86_64_INTEGER_CLASS; |
| 2542 | classes[1] = X86_64_INTEGERSI_CLASS; |
| 2543 | return 2; |
| 2544 | } |
| 2545 | else if (size < 64+64) |
| 2546 | { |
| 2547 | classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
| 2548 | return 2; |
| 2549 | } |
| 2550 | else |
| 2551 | gcc_unreachable (); |
| 2552 | } |
| 2553 | case E_CDImode: |
| 2554 | case E_TImode: |
| 2555 | classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
| 2556 | return 2; |
| 2557 | case E_COImode: |
| 2558 | case E_OImode: |
| 2559 | /* OImode shouldn't be used directly. */ |
| 2560 | gcc_unreachable (); |
| 2561 | case E_CTImode: |
| 2562 | return 0; |
| 2563 | case E_HFmode: |
| 2564 | case E_BFmode: |
| 2565 | if (!(bit_offset % 64)) |
| 2566 | classes[0] = X86_64_SSEHF_CLASS; |
| 2567 | else |
| 2568 | classes[0] = X86_64_SSE_CLASS; |
| 2569 | return 1; |
| 2570 | case E_SFmode: |
| 2571 | if (!(bit_offset % 64)) |
| 2572 | classes[0] = X86_64_SSESF_CLASS; |
| 2573 | else |
| 2574 | classes[0] = X86_64_SSE_CLASS; |
| 2575 | return 1; |
| 2576 | case E_DFmode: |
| 2577 | classes[0] = X86_64_SSEDF_CLASS; |
| 2578 | return 1; |
| 2579 | case E_XFmode: |
| 2580 | classes[0] = X86_64_X87_CLASS; |
| 2581 | classes[1] = X86_64_X87UP_CLASS; |
| 2582 | return 2; |
| 2583 | case E_TFmode: |
| 2584 | classes[0] = X86_64_SSE_CLASS; |
| 2585 | classes[1] = X86_64_SSEUP_CLASS; |
| 2586 | return 2; |
| 2587 | case E_HCmode: |
| 2588 | case E_BCmode: |
| 2589 | classes[0] = X86_64_SSE_CLASS; |
| 2590 | if (!(bit_offset % 64)) |
| 2591 | return 1; |
| 2592 | else |
| 2593 | { |
| 2594 | classes[1] = X86_64_SSEHF_CLASS; |
| 2595 | return 2; |
| 2596 | } |
| 2597 | case E_SCmode: |
| 2598 | classes[0] = X86_64_SSE_CLASS; |
| 2599 | if (!(bit_offset % 64)) |
| 2600 | return 1; |
| 2601 | else |
| 2602 | { |
| 2603 | static bool warned; |
| 2604 | |
| 2605 | if (!warned && warn_psabi) |
| 2606 | { |
| 2607 | warned = true; |
| 2608 | inform (input_location, |
| 2609 | "the ABI of passing structure with %<complex float%>" |
| 2610 | " member has changed in GCC 4.4" ); |
| 2611 | } |
| 2612 | classes[1] = X86_64_SSESF_CLASS; |
| 2613 | return 2; |
| 2614 | } |
| 2615 | case E_DCmode: |
| 2616 | classes[0] = X86_64_SSEDF_CLASS; |
| 2617 | classes[1] = X86_64_SSEDF_CLASS; |
| 2618 | return 2; |
| 2619 | case E_XCmode: |
| 2620 | classes[0] = X86_64_COMPLEX_X87_CLASS; |
| 2621 | return 1; |
| 2622 | case E_TCmode: |
| 2623 | /* This modes is larger than 16 bytes. */ |
| 2624 | return 0; |
| 2625 | case E_V8SFmode: |
| 2626 | case E_V8SImode: |
| 2627 | case E_V32QImode: |
| 2628 | case E_V16HFmode: |
| 2629 | case E_V16BFmode: |
| 2630 | case E_V16HImode: |
| 2631 | case E_V4DFmode: |
| 2632 | case E_V4DImode: |
| 2633 | classes[0] = X86_64_SSE_CLASS; |
| 2634 | classes[1] = X86_64_SSEUP_CLASS; |
| 2635 | classes[2] = X86_64_SSEUP_CLASS; |
| 2636 | classes[3] = X86_64_SSEUP_CLASS; |
| 2637 | return 4; |
| 2638 | case E_V8DFmode: |
| 2639 | case E_V16SFmode: |
| 2640 | case E_V32HFmode: |
| 2641 | case E_V32BFmode: |
| 2642 | case E_V8DImode: |
| 2643 | case E_V16SImode: |
| 2644 | case E_V32HImode: |
| 2645 | case E_V64QImode: |
| 2646 | classes[0] = X86_64_SSE_CLASS; |
| 2647 | classes[1] = X86_64_SSEUP_CLASS; |
| 2648 | classes[2] = X86_64_SSEUP_CLASS; |
| 2649 | classes[3] = X86_64_SSEUP_CLASS; |
| 2650 | classes[4] = X86_64_SSEUP_CLASS; |
| 2651 | classes[5] = X86_64_SSEUP_CLASS; |
| 2652 | classes[6] = X86_64_SSEUP_CLASS; |
| 2653 | classes[7] = X86_64_SSEUP_CLASS; |
| 2654 | return 8; |
| 2655 | case E_V4SFmode: |
| 2656 | case E_V4SImode: |
| 2657 | case E_V16QImode: |
| 2658 | case E_V8HImode: |
| 2659 | case E_V8HFmode: |
| 2660 | case E_V8BFmode: |
| 2661 | case E_V2DFmode: |
| 2662 | case E_V2DImode: |
| 2663 | classes[0] = X86_64_SSE_CLASS; |
| 2664 | classes[1] = X86_64_SSEUP_CLASS; |
| 2665 | return 2; |
| 2666 | case E_V1TImode: |
| 2667 | case E_V1DImode: |
| 2668 | case E_V2SFmode: |
| 2669 | case E_V2SImode: |
| 2670 | case E_V4HImode: |
| 2671 | case E_V4HFmode: |
| 2672 | case E_V4BFmode: |
| 2673 | case E_V2HFmode: |
| 2674 | case E_V2BFmode: |
| 2675 | case E_V8QImode: |
| 2676 | classes[0] = X86_64_SSE_CLASS; |
| 2677 | return 1; |
| 2678 | case E_BLKmode: |
| 2679 | case E_VOIDmode: |
| 2680 | return 0; |
| 2681 | default: |
| 2682 | gcc_assert (VECTOR_MODE_P (mode)); |
| 2683 | |
| 2684 | if (bytes > 16) |
| 2685 | return 0; |
| 2686 | |
| 2687 | gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); |
| 2688 | |
| 2689 | if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) |
| 2690 | classes[0] = X86_64_INTEGERSI_CLASS; |
| 2691 | else |
| 2692 | classes[0] = X86_64_INTEGER_CLASS; |
| 2693 | classes[1] = X86_64_INTEGER_CLASS; |
| 2694 | return 1 + (bytes > 8); |
| 2695 | } |
| 2696 | } |
| 2697 | |
| 2698 | /* Wrapper around classify_argument with the extra zero_width_bitfields |
| 2699 | argument, to diagnose GCC 12.1 ABI differences for C. */ |
| 2700 | |
| 2701 | static int |
| 2702 | classify_argument (machine_mode mode, const_tree type, |
| 2703 | enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) |
| 2704 | { |
| 2705 | int zero_width_bitfields = 0; |
| 2706 | static bool warned = false; |
| 2707 | int n = classify_argument (mode, type, classes, bit_offset, |
| 2708 | zero_width_bitfields); |
| 2709 | if (!zero_width_bitfields || warned || !warn_psabi) |
| 2710 | return n; |
| 2711 | enum x86_64_reg_class alt_classes[MAX_CLASSES]; |
| 2712 | zero_width_bitfields = 2; |
| 2713 | if (classify_argument (mode, type, classes: alt_classes, bit_offset, |
| 2714 | zero_width_bitfields) != n) |
| 2715 | zero_width_bitfields = 3; |
| 2716 | else |
| 2717 | for (int i = 0; i < n; i++) |
| 2718 | if (classes[i] != alt_classes[i]) |
| 2719 | { |
| 2720 | zero_width_bitfields = 3; |
| 2721 | break; |
| 2722 | } |
| 2723 | if (zero_width_bitfields == 3) |
| 2724 | { |
| 2725 | warned = true; |
| 2726 | const char *url |
| 2727 | = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields" ; |
| 2728 | |
| 2729 | inform (input_location, |
| 2730 | "the ABI of passing C structures with zero-width bit-fields" |
| 2731 | " has changed in GCC %{12.1%}" , url); |
| 2732 | } |
| 2733 | return n; |
| 2734 | } |
| 2735 | |
| 2736 | /* Examine the argument and return set number of register required in each |
| 2737 | class. Return true iff parameter should be passed in memory. */ |
| 2738 | |
| 2739 | static bool |
| 2740 | examine_argument (machine_mode mode, const_tree type, int in_return, |
| 2741 | int *int_nregs, int *sse_nregs) |
| 2742 | { |
| 2743 | enum x86_64_reg_class regclass[MAX_CLASSES]; |
| 2744 | int n = classify_argument (mode, type, classes: regclass, bit_offset: 0); |
| 2745 | |
| 2746 | *int_nregs = 0; |
| 2747 | *sse_nregs = 0; |
| 2748 | |
| 2749 | if (!n) |
| 2750 | return true; |
| 2751 | for (n--; n >= 0; n--) |
| 2752 | switch (regclass[n]) |
| 2753 | { |
| 2754 | case X86_64_INTEGER_CLASS: |
| 2755 | case X86_64_INTEGERSI_CLASS: |
| 2756 | (*int_nregs)++; |
| 2757 | break; |
| 2758 | case X86_64_SSE_CLASS: |
| 2759 | case X86_64_SSEHF_CLASS: |
| 2760 | case X86_64_SSESF_CLASS: |
| 2761 | case X86_64_SSEDF_CLASS: |
| 2762 | (*sse_nregs)++; |
| 2763 | break; |
| 2764 | case X86_64_NO_CLASS: |
| 2765 | case X86_64_SSEUP_CLASS: |
| 2766 | break; |
| 2767 | case X86_64_X87_CLASS: |
| 2768 | case X86_64_X87UP_CLASS: |
| 2769 | case X86_64_COMPLEX_X87_CLASS: |
| 2770 | if (!in_return) |
| 2771 | return true; |
| 2772 | break; |
| 2773 | case X86_64_MEMORY_CLASS: |
| 2774 | gcc_unreachable (); |
| 2775 | } |
| 2776 | |
| 2777 | return false; |
| 2778 | } |
| 2779 | |
| 2780 | /* Construct container for the argument used by GCC interface. See |
| 2781 | FUNCTION_ARG for the detailed description. */ |
| 2782 | |
| 2783 | static rtx |
| 2784 | construct_container (machine_mode mode, machine_mode orig_mode, |
| 2785 | const_tree type, int in_return, int nintregs, int nsseregs, |
| 2786 | const int *intreg, int sse_regno) |
| 2787 | { |
| 2788 | /* The following variables hold the static issued_error state. */ |
| 2789 | static bool issued_sse_arg_error; |
| 2790 | static bool issued_sse_ret_error; |
| 2791 | static bool issued_x87_ret_error; |
| 2792 | |
| 2793 | machine_mode tmpmode; |
| 2794 | int bytes |
| 2795 | = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
| 2796 | enum x86_64_reg_class regclass[MAX_CLASSES]; |
| 2797 | int n; |
| 2798 | int i; |
| 2799 | int nexps = 0; |
| 2800 | int needed_sseregs, needed_intregs; |
| 2801 | rtx exp[MAX_CLASSES]; |
| 2802 | rtx ret; |
| 2803 | |
| 2804 | n = classify_argument (mode, type, classes: regclass, bit_offset: 0); |
| 2805 | if (!n) |
| 2806 | return NULL; |
| 2807 | if (examine_argument (mode, type, in_return, int_nregs: &needed_intregs, |
| 2808 | sse_nregs: &needed_sseregs)) |
| 2809 | return NULL; |
| 2810 | if (needed_intregs > nintregs || needed_sseregs > nsseregs) |
| 2811 | return NULL; |
| 2812 | |
| 2813 | /* We allowed the user to turn off SSE for kernel mode. Don't crash if |
| 2814 | some less clueful developer tries to use floating-point anyway. */ |
| 2815 | if (needed_sseregs |
| 2816 | && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2))) |
| 2817 | { |
| 2818 | /* Return early if we shouldn't raise an error for invalid |
| 2819 | calls. */ |
| 2820 | if (cfun != NULL && cfun->machine->silent_p) |
| 2821 | return NULL; |
| 2822 | if (in_return) |
| 2823 | { |
| 2824 | if (!issued_sse_ret_error) |
| 2825 | { |
| 2826 | if (VALID_SSE2_TYPE_MODE (mode)) |
| 2827 | error ("SSE register return with SSE2 disabled" ); |
| 2828 | else |
| 2829 | error ("SSE register return with SSE disabled" ); |
| 2830 | issued_sse_ret_error = true; |
| 2831 | } |
| 2832 | } |
| 2833 | else if (!issued_sse_arg_error) |
| 2834 | { |
| 2835 | if (VALID_SSE2_TYPE_MODE (mode)) |
| 2836 | error ("SSE register argument with SSE2 disabled" ); |
| 2837 | else |
| 2838 | error ("SSE register argument with SSE disabled" ); |
| 2839 | issued_sse_arg_error = true; |
| 2840 | } |
| 2841 | return NULL; |
| 2842 | } |
| 2843 | |
| 2844 | /* Likewise, error if the ABI requires us to return values in the |
| 2845 | x87 registers and the user specified -mno-80387. */ |
| 2846 | if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) |
| 2847 | for (i = 0; i < n; i++) |
| 2848 | if (regclass[i] == X86_64_X87_CLASS |
| 2849 | || regclass[i] == X86_64_X87UP_CLASS |
| 2850 | || regclass[i] == X86_64_COMPLEX_X87_CLASS) |
| 2851 | { |
| 2852 | /* Return early if we shouldn't raise an error for invalid |
| 2853 | calls. */ |
| 2854 | if (cfun != NULL && cfun->machine->silent_p) |
| 2855 | return NULL; |
| 2856 | if (!issued_x87_ret_error) |
| 2857 | { |
| 2858 | error ("x87 register return with x87 disabled" ); |
| 2859 | issued_x87_ret_error = true; |
| 2860 | } |
| 2861 | return NULL; |
| 2862 | } |
| 2863 | |
| 2864 | /* First construct simple cases. Avoid SCmode, since we want to use |
| 2865 | single register to pass this type. */ |
| 2866 | if (n == 1 && mode != SCmode && mode != HCmode) |
| 2867 | switch (regclass[0]) |
| 2868 | { |
| 2869 | case X86_64_INTEGER_CLASS: |
| 2870 | case X86_64_INTEGERSI_CLASS: |
| 2871 | return gen_rtx_REG (mode, intreg[0]); |
| 2872 | case X86_64_SSE_CLASS: |
| 2873 | case X86_64_SSEHF_CLASS: |
| 2874 | case X86_64_SSESF_CLASS: |
| 2875 | case X86_64_SSEDF_CLASS: |
| 2876 | if (mode != BLKmode) |
| 2877 | return gen_reg_or_parallel (mode, orig_mode, |
| 2878 | GET_SSE_REGNO (sse_regno)); |
| 2879 | break; |
| 2880 | case X86_64_X87_CLASS: |
| 2881 | case X86_64_COMPLEX_X87_CLASS: |
| 2882 | return gen_rtx_REG (mode, FIRST_STACK_REG); |
| 2883 | case X86_64_NO_CLASS: |
| 2884 | /* Zero sized array, struct or class. */ |
| 2885 | return NULL; |
| 2886 | default: |
| 2887 | gcc_unreachable (); |
| 2888 | } |
| 2889 | if (n == 2 |
| 2890 | && regclass[0] == X86_64_SSE_CLASS |
| 2891 | && regclass[1] == X86_64_SSEUP_CLASS |
| 2892 | && mode != BLKmode) |
| 2893 | return gen_reg_or_parallel (mode, orig_mode, |
| 2894 | GET_SSE_REGNO (sse_regno)); |
| 2895 | if (n == 4 |
| 2896 | && regclass[0] == X86_64_SSE_CLASS |
| 2897 | && regclass[1] == X86_64_SSEUP_CLASS |
| 2898 | && regclass[2] == X86_64_SSEUP_CLASS |
| 2899 | && regclass[3] == X86_64_SSEUP_CLASS |
| 2900 | && mode != BLKmode) |
| 2901 | return gen_reg_or_parallel (mode, orig_mode, |
| 2902 | GET_SSE_REGNO (sse_regno)); |
| 2903 | if (n == 8 |
| 2904 | && regclass[0] == X86_64_SSE_CLASS |
| 2905 | && regclass[1] == X86_64_SSEUP_CLASS |
| 2906 | && regclass[2] == X86_64_SSEUP_CLASS |
| 2907 | && regclass[3] == X86_64_SSEUP_CLASS |
| 2908 | && regclass[4] == X86_64_SSEUP_CLASS |
| 2909 | && regclass[5] == X86_64_SSEUP_CLASS |
| 2910 | && regclass[6] == X86_64_SSEUP_CLASS |
| 2911 | && regclass[7] == X86_64_SSEUP_CLASS |
| 2912 | && mode != BLKmode) |
| 2913 | return gen_reg_or_parallel (mode, orig_mode, |
| 2914 | GET_SSE_REGNO (sse_regno)); |
| 2915 | if (n == 2 |
| 2916 | && regclass[0] == X86_64_X87_CLASS |
| 2917 | && regclass[1] == X86_64_X87UP_CLASS) |
| 2918 | return gen_rtx_REG (XFmode, FIRST_STACK_REG); |
| 2919 | |
| 2920 | if (n == 2 |
| 2921 | && regclass[0] == X86_64_INTEGER_CLASS |
| 2922 | && regclass[1] == X86_64_INTEGER_CLASS |
| 2923 | && (mode == CDImode || mode == TImode || mode == BLKmode) |
| 2924 | && intreg[0] + 1 == intreg[1]) |
| 2925 | { |
| 2926 | if (mode == BLKmode) |
| 2927 | { |
| 2928 | /* Use TImode for BLKmode values in 2 integer registers. */ |
| 2929 | exp[0] = gen_rtx_EXPR_LIST (VOIDmode, |
| 2930 | gen_rtx_REG (TImode, intreg[0]), |
| 2931 | GEN_INT (0)); |
| 2932 | ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1)); |
| 2933 | XVECEXP (ret, 0, 0) = exp[0]; |
| 2934 | return ret; |
| 2935 | } |
| 2936 | else |
| 2937 | return gen_rtx_REG (mode, intreg[0]); |
| 2938 | } |
| 2939 | |
| 2940 | /* Otherwise figure out the entries of the PARALLEL. */ |
| 2941 | for (i = 0; i < n; i++) |
| 2942 | { |
| 2943 | int pos; |
| 2944 | |
| 2945 | switch (regclass[i]) |
| 2946 | { |
| 2947 | case X86_64_NO_CLASS: |
| 2948 | break; |
| 2949 | case X86_64_INTEGER_CLASS: |
| 2950 | case X86_64_INTEGERSI_CLASS: |
| 2951 | /* Merge TImodes on aligned occasions here too. */ |
| 2952 | if (i * 8 + 8 > bytes) |
| 2953 | { |
| 2954 | unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT; |
| 2955 | if (!int_mode_for_size (size: tmpbits, limit: 0).exists (mode: &tmpmode)) |
| 2956 | /* We've requested 24 bytes we |
| 2957 | don't have mode for. Use DImode. */ |
| 2958 | tmpmode = DImode; |
| 2959 | } |
| 2960 | else if (regclass[i] == X86_64_INTEGERSI_CLASS) |
| 2961 | tmpmode = SImode; |
| 2962 | else |
| 2963 | tmpmode = DImode; |
| 2964 | exp [nexps++] |
| 2965 | = gen_rtx_EXPR_LIST (VOIDmode, |
| 2966 | gen_rtx_REG (tmpmode, *intreg), |
| 2967 | GEN_INT (i*8)); |
| 2968 | intreg++; |
| 2969 | break; |
| 2970 | case X86_64_SSEHF_CLASS: |
| 2971 | tmpmode = (mode == BFmode ? BFmode : HFmode); |
| 2972 | exp [nexps++] |
| 2973 | = gen_rtx_EXPR_LIST (VOIDmode, |
| 2974 | gen_rtx_REG (tmpmode, |
| 2975 | GET_SSE_REGNO (sse_regno)), |
| 2976 | GEN_INT (i*8)); |
| 2977 | sse_regno++; |
| 2978 | break; |
| 2979 | case X86_64_SSESF_CLASS: |
| 2980 | exp [nexps++] |
| 2981 | = gen_rtx_EXPR_LIST (VOIDmode, |
| 2982 | gen_rtx_REG (SFmode, |
| 2983 | GET_SSE_REGNO (sse_regno)), |
| 2984 | GEN_INT (i*8)); |
| 2985 | sse_regno++; |
| 2986 | break; |
| 2987 | case X86_64_SSEDF_CLASS: |
| 2988 | exp [nexps++] |
| 2989 | = gen_rtx_EXPR_LIST (VOIDmode, |
| 2990 | gen_rtx_REG (DFmode, |
| 2991 | GET_SSE_REGNO (sse_regno)), |
| 2992 | GEN_INT (i*8)); |
| 2993 | sse_regno++; |
| 2994 | break; |
| 2995 | case X86_64_SSE_CLASS: |
| 2996 | pos = i; |
| 2997 | switch (n) |
| 2998 | { |
| 2999 | case 1: |
| 3000 | tmpmode = DImode; |
| 3001 | break; |
| 3002 | case 2: |
| 3003 | if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) |
| 3004 | { |
| 3005 | tmpmode = TImode; |
| 3006 | i++; |
| 3007 | } |
| 3008 | else |
| 3009 | tmpmode = DImode; |
| 3010 | break; |
| 3011 | case 4: |
| 3012 | gcc_assert (i == 0 |
| 3013 | && regclass[1] == X86_64_SSEUP_CLASS |
| 3014 | && regclass[2] == X86_64_SSEUP_CLASS |
| 3015 | && regclass[3] == X86_64_SSEUP_CLASS); |
| 3016 | tmpmode = OImode; |
| 3017 | i += 3; |
| 3018 | break; |
| 3019 | case 8: |
| 3020 | gcc_assert (i == 0 |
| 3021 | && regclass[1] == X86_64_SSEUP_CLASS |
| 3022 | && regclass[2] == X86_64_SSEUP_CLASS |
| 3023 | && regclass[3] == X86_64_SSEUP_CLASS |
| 3024 | && regclass[4] == X86_64_SSEUP_CLASS |
| 3025 | && regclass[5] == X86_64_SSEUP_CLASS |
| 3026 | && regclass[6] == X86_64_SSEUP_CLASS |
| 3027 | && regclass[7] == X86_64_SSEUP_CLASS); |
| 3028 | tmpmode = XImode; |
| 3029 | i += 7; |
| 3030 | break; |
| 3031 | default: |
| 3032 | gcc_unreachable (); |
| 3033 | } |
| 3034 | exp [nexps++] |
| 3035 | = gen_rtx_EXPR_LIST (VOIDmode, |
| 3036 | gen_rtx_REG (tmpmode, |
| 3037 | GET_SSE_REGNO (sse_regno)), |
| 3038 | GEN_INT (pos*8)); |
| 3039 | sse_regno++; |
| 3040 | break; |
| 3041 | default: |
| 3042 | gcc_unreachable (); |
| 3043 | } |
| 3044 | } |
| 3045 | |
| 3046 | /* Empty aligned struct, union or class. */ |
| 3047 | if (nexps == 0) |
| 3048 | return NULL; |
| 3049 | |
| 3050 | ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); |
| 3051 | for (i = 0; i < nexps; i++) |
| 3052 | XVECEXP (ret, 0, i) = exp [i]; |
| 3053 | return ret; |
| 3054 | } |
| 3055 | |
| 3056 | /* Update the data in CUM to advance over an argument of mode MODE |
| 3057 | and data type TYPE. (TYPE is null for libcalls where that information |
| 3058 | may not be available.) |
| 3059 | |
| 3060 | Return a number of integer regsiters advanced over. */ |
| 3061 | |
| 3062 | static int |
| 3063 | function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, |
| 3064 | const_tree type, HOST_WIDE_INT bytes, |
| 3065 | HOST_WIDE_INT words) |
| 3066 | { |
| 3067 | int res = 0; |
| 3068 | bool error_p = false; |
| 3069 | |
| 3070 | if (TARGET_IAMCU) |
| 3071 | { |
| 3072 | /* Intel MCU psABI passes scalars and aggregates no larger than 8 |
| 3073 | bytes in registers. */ |
| 3074 | if (!VECTOR_MODE_P (mode) && bytes <= 8) |
| 3075 | goto pass_in_reg; |
| 3076 | return res; |
| 3077 | } |
| 3078 | |
| 3079 | switch (mode) |
| 3080 | { |
| 3081 | default: |
| 3082 | break; |
| 3083 | |
| 3084 | case E_BLKmode: |
| 3085 | if (bytes < 0) |
| 3086 | break; |
| 3087 | /* FALLTHRU */ |
| 3088 | |
| 3089 | case E_DImode: |
| 3090 | case E_SImode: |
| 3091 | case E_HImode: |
| 3092 | case E_QImode: |
| 3093 | pass_in_reg: |
| 3094 | cum->words += words; |
| 3095 | cum->nregs -= words; |
| 3096 | cum->regno += words; |
| 3097 | if (cum->nregs >= 0) |
| 3098 | res = words; |
| 3099 | if (cum->nregs <= 0) |
| 3100 | { |
| 3101 | cum->nregs = 0; |
| 3102 | cfun->machine->arg_reg_available = false; |
| 3103 | cum->regno = 0; |
| 3104 | } |
| 3105 | break; |
| 3106 | |
| 3107 | case E_OImode: |
| 3108 | /* OImode shouldn't be used directly. */ |
| 3109 | gcc_unreachable (); |
| 3110 | |
| 3111 | case E_DFmode: |
| 3112 | if (cum->float_in_sse == -1) |
| 3113 | error_p = true; |
| 3114 | if (cum->float_in_sse < 2) |
| 3115 | break; |
| 3116 | /* FALLTHRU */ |
| 3117 | case E_SFmode: |
| 3118 | if (cum->float_in_sse == -1) |
| 3119 | error_p = true; |
| 3120 | if (cum->float_in_sse < 1) |
| 3121 | break; |
| 3122 | /* FALLTHRU */ |
| 3123 | |
| 3124 | case E_V16HFmode: |
| 3125 | case E_V16BFmode: |
| 3126 | case E_V8SFmode: |
| 3127 | case E_V8SImode: |
| 3128 | case E_V64QImode: |
| 3129 | case E_V32HImode: |
| 3130 | case E_V16SImode: |
| 3131 | case E_V8DImode: |
| 3132 | case E_V32HFmode: |
| 3133 | case E_V32BFmode: |
| 3134 | case E_V16SFmode: |
| 3135 | case E_V8DFmode: |
| 3136 | case E_V32QImode: |
| 3137 | case E_V16HImode: |
| 3138 | case E_V4DFmode: |
| 3139 | case E_V4DImode: |
| 3140 | case E_TImode: |
| 3141 | case E_V16QImode: |
| 3142 | case E_V8HImode: |
| 3143 | case E_V4SImode: |
| 3144 | case E_V2DImode: |
| 3145 | case E_V8HFmode: |
| 3146 | case E_V8BFmode: |
| 3147 | case E_V4SFmode: |
| 3148 | case E_V2DFmode: |
| 3149 | if (!type || !AGGREGATE_TYPE_P (type)) |
| 3150 | { |
| 3151 | cum->sse_words += words; |
| 3152 | cum->sse_nregs -= 1; |
| 3153 | cum->sse_regno += 1; |
| 3154 | if (cum->sse_nregs <= 0) |
| 3155 | { |
| 3156 | cum->sse_nregs = 0; |
| 3157 | cum->sse_regno = 0; |
| 3158 | } |
| 3159 | } |
| 3160 | break; |
| 3161 | |
| 3162 | case E_V8QImode: |
| 3163 | case E_V4HImode: |
| 3164 | case E_V4HFmode: |
| 3165 | case E_V4BFmode: |
| 3166 | case E_V2SImode: |
| 3167 | case E_V2SFmode: |
| 3168 | case E_V1TImode: |
| 3169 | case E_V1DImode: |
| 3170 | if (!type || !AGGREGATE_TYPE_P (type)) |
| 3171 | { |
| 3172 | cum->mmx_words += words; |
| 3173 | cum->mmx_nregs -= 1; |
| 3174 | cum->mmx_regno += 1; |
| 3175 | if (cum->mmx_nregs <= 0) |
| 3176 | { |
| 3177 | cum->mmx_nregs = 0; |
| 3178 | cum->mmx_regno = 0; |
| 3179 | } |
| 3180 | } |
| 3181 | break; |
| 3182 | } |
| 3183 | if (error_p) |
| 3184 | { |
| 3185 | cum->float_in_sse = 0; |
| 3186 | error ("calling %qD with SSE calling convention without " |
| 3187 | "SSE/SSE2 enabled" , cum->decl); |
| 3188 | sorry ("this is a GCC bug that can be worked around by adding " |
| 3189 | "attribute used to function called" ); |
| 3190 | } |
| 3191 | |
| 3192 | return res; |
| 3193 | } |
| 3194 | |
| 3195 | static int |
| 3196 | function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode, |
| 3197 | const_tree type, HOST_WIDE_INT words, bool named) |
| 3198 | { |
| 3199 | int int_nregs, sse_nregs; |
| 3200 | |
| 3201 | /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ |
| 3202 | if (!named && (VALID_AVX512F_REG_MODE (mode) |
| 3203 | || VALID_AVX256_REG_MODE (mode))) |
| 3204 | return 0; |
| 3205 | |
| 3206 | if (!examine_argument (mode, type, in_return: 0, int_nregs: &int_nregs, sse_nregs: &sse_nregs) |
| 3207 | && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) |
| 3208 | { |
| 3209 | cum->nregs -= int_nregs; |
| 3210 | cum->sse_nregs -= sse_nregs; |
| 3211 | cum->regno += int_nregs; |
| 3212 | cum->sse_regno += sse_nregs; |
| 3213 | return int_nregs; |
| 3214 | } |
| 3215 | else |
| 3216 | { |
| 3217 | int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; |
| 3218 | cum->words = ROUND_UP (cum->words, align); |
| 3219 | cum->words += words; |
| 3220 | return 0; |
| 3221 | } |
| 3222 | } |
| 3223 | |
| 3224 | static int |
| 3225 | function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, |
| 3226 | HOST_WIDE_INT words) |
| 3227 | { |
| 3228 | /* Otherwise, this should be passed indirect. */ |
| 3229 | gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); |
| 3230 | |
| 3231 | cum->words += words; |
| 3232 | if (cum->nregs > 0) |
| 3233 | { |
| 3234 | cum->nregs -= 1; |
| 3235 | cum->regno += 1; |
| 3236 | return 1; |
| 3237 | } |
| 3238 | return 0; |
| 3239 | } |
| 3240 | |
| 3241 | /* Update the data in CUM to advance over argument ARG. */ |
| 3242 | |
| 3243 | static void |
| 3244 | ix86_function_arg_advance (cumulative_args_t cum_v, |
| 3245 | const function_arg_info &arg) |
| 3246 | { |
| 3247 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
| 3248 | machine_mode mode = arg.mode; |
| 3249 | HOST_WIDE_INT bytes, words; |
| 3250 | int nregs; |
| 3251 | |
| 3252 | /* The argument of interrupt handler is a special case and is |
| 3253 | handled in ix86_function_arg. */ |
| 3254 | if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) |
| 3255 | return; |
| 3256 | |
| 3257 | bytes = arg.promoted_size_in_bytes (); |
| 3258 | words = CEIL (bytes, UNITS_PER_WORD); |
| 3259 | |
| 3260 | if (arg.type) |
| 3261 | mode = type_natural_mode (type: arg.type, NULL, in_return: false); |
| 3262 | |
| 3263 | if (TARGET_64BIT) |
| 3264 | { |
| 3265 | enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
| 3266 | |
| 3267 | if (call_abi == MS_ABI) |
| 3268 | nregs = function_arg_advance_ms_64 (cum, bytes, words); |
| 3269 | else |
| 3270 | nregs = function_arg_advance_64 (cum, mode, type: arg.type, words, |
| 3271 | named: arg.named); |
| 3272 | } |
| 3273 | else |
| 3274 | nregs = function_arg_advance_32 (cum, mode, type: arg.type, bytes, words); |
| 3275 | |
| 3276 | if (!nregs) |
| 3277 | { |
| 3278 | /* Track if there are outgoing arguments on stack. */ |
| 3279 | if (cum->caller) |
| 3280 | cfun->machine->outgoing_args_on_stack = true; |
| 3281 | } |
| 3282 | } |
| 3283 | |
| 3284 | /* Define where to put the arguments to a function. |
| 3285 | Value is zero to push the argument on the stack, |
| 3286 | or a hard register in which to store the argument. |
| 3287 | |
| 3288 | MODE is the argument's machine mode. |
| 3289 | TYPE is the data type of the argument (as a tree). |
| 3290 | This is null for libcalls where that information may |
| 3291 | not be available. |
| 3292 | CUM is a variable of type CUMULATIVE_ARGS which gives info about |
| 3293 | the preceding args and about the function being called. |
| 3294 | NAMED is nonzero if this argument is a named parameter |
| 3295 | (otherwise it is an extra parameter matching an ellipsis). */ |
| 3296 | |
| 3297 | static rtx |
| 3298 | function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, |
| 3299 | machine_mode orig_mode, const_tree type, |
| 3300 | HOST_WIDE_INT bytes, HOST_WIDE_INT words) |
| 3301 | { |
| 3302 | bool error_p = false; |
| 3303 | |
| 3304 | /* Avoid the AL settings for the Unix64 ABI. */ |
| 3305 | if (mode == VOIDmode) |
| 3306 | return constm1_rtx; |
| 3307 | |
| 3308 | if (TARGET_IAMCU) |
| 3309 | { |
| 3310 | /* Intel MCU psABI passes scalars and aggregates no larger than 8 |
| 3311 | bytes in registers. */ |
| 3312 | if (!VECTOR_MODE_P (mode) && bytes <= 8) |
| 3313 | goto pass_in_reg; |
| 3314 | return NULL_RTX; |
| 3315 | } |
| 3316 | |
| 3317 | switch (mode) |
| 3318 | { |
| 3319 | default: |
| 3320 | break; |
| 3321 | |
| 3322 | case E_BLKmode: |
| 3323 | if (bytes < 0) |
| 3324 | break; |
| 3325 | /* FALLTHRU */ |
| 3326 | case E_DImode: |
| 3327 | case E_SImode: |
| 3328 | case E_HImode: |
| 3329 | case E_QImode: |
| 3330 | pass_in_reg: |
| 3331 | if (words <= cum->nregs) |
| 3332 | { |
| 3333 | int regno = cum->regno; |
| 3334 | |
| 3335 | /* Fastcall allocates the first two DWORD (SImode) or |
| 3336 | smaller arguments to ECX and EDX if it isn't an |
| 3337 | aggregate type . */ |
| 3338 | if (cum->fastcall) |
| 3339 | { |
| 3340 | if (mode == BLKmode |
| 3341 | || mode == DImode |
| 3342 | || (type && AGGREGATE_TYPE_P (type))) |
| 3343 | break; |
| 3344 | |
| 3345 | /* ECX not EAX is the first allocated register. */ |
| 3346 | if (regno == AX_REG) |
| 3347 | regno = CX_REG; |
| 3348 | } |
| 3349 | return gen_rtx_REG (mode, regno); |
| 3350 | } |
| 3351 | break; |
| 3352 | |
| 3353 | case E_DFmode: |
| 3354 | if (cum->float_in_sse == -1) |
| 3355 | error_p = true; |
| 3356 | if (cum->float_in_sse < 2) |
| 3357 | break; |
| 3358 | /* FALLTHRU */ |
| 3359 | case E_SFmode: |
| 3360 | if (cum->float_in_sse == -1) |
| 3361 | error_p = true; |
| 3362 | if (cum->float_in_sse < 1) |
| 3363 | break; |
| 3364 | /* FALLTHRU */ |
| 3365 | case E_TImode: |
| 3366 | /* In 32bit, we pass TImode in xmm registers. */ |
| 3367 | case E_V16QImode: |
| 3368 | case E_V8HImode: |
| 3369 | case E_V4SImode: |
| 3370 | case E_V2DImode: |
| 3371 | case E_V8HFmode: |
| 3372 | case E_V8BFmode: |
| 3373 | case E_V4SFmode: |
| 3374 | case E_V2DFmode: |
| 3375 | if (!type || !AGGREGATE_TYPE_P (type)) |
| 3376 | { |
| 3377 | if (cum->sse_nregs) |
| 3378 | return gen_reg_or_parallel (mode, orig_mode, |
| 3379 | regno: cum->sse_regno + FIRST_SSE_REG); |
| 3380 | } |
| 3381 | break; |
| 3382 | |
| 3383 | case E_OImode: |
| 3384 | case E_XImode: |
| 3385 | /* OImode and XImode shouldn't be used directly. */ |
| 3386 | gcc_unreachable (); |
| 3387 | |
| 3388 | case E_V64QImode: |
| 3389 | case E_V32HImode: |
| 3390 | case E_V16SImode: |
| 3391 | case E_V8DImode: |
| 3392 | case E_V32HFmode: |
| 3393 | case E_V32BFmode: |
| 3394 | case E_V16SFmode: |
| 3395 | case E_V8DFmode: |
| 3396 | case E_V16HFmode: |
| 3397 | case E_V16BFmode: |
| 3398 | case E_V8SFmode: |
| 3399 | case E_V8SImode: |
| 3400 | case E_V32QImode: |
| 3401 | case E_V16HImode: |
| 3402 | case E_V4DFmode: |
| 3403 | case E_V4DImode: |
| 3404 | if (!type || !AGGREGATE_TYPE_P (type)) |
| 3405 | { |
| 3406 | if (cum->sse_nregs) |
| 3407 | return gen_reg_or_parallel (mode, orig_mode, |
| 3408 | regno: cum->sse_regno + FIRST_SSE_REG); |
| 3409 | } |
| 3410 | break; |
| 3411 | |
| 3412 | case E_V8QImode: |
| 3413 | case E_V4HImode: |
| 3414 | case E_V4HFmode: |
| 3415 | case E_V4BFmode: |
| 3416 | case E_V2SImode: |
| 3417 | case E_V2SFmode: |
| 3418 | case E_V1TImode: |
| 3419 | case E_V1DImode: |
| 3420 | if (!type || !AGGREGATE_TYPE_P (type)) |
| 3421 | { |
| 3422 | if (cum->mmx_nregs) |
| 3423 | return gen_reg_or_parallel (mode, orig_mode, |
| 3424 | regno: cum->mmx_regno + FIRST_MMX_REG); |
| 3425 | } |
| 3426 | break; |
| 3427 | } |
| 3428 | if (error_p) |
| 3429 | { |
| 3430 | cum->float_in_sse = 0; |
| 3431 | error ("calling %qD with SSE calling convention without " |
| 3432 | "SSE/SSE2 enabled" , cum->decl); |
| 3433 | sorry ("this is a GCC bug that can be worked around by adding " |
| 3434 | "attribute used to function called" ); |
| 3435 | } |
| 3436 | |
| 3437 | return NULL_RTX; |
| 3438 | } |
| 3439 | |
| 3440 | static rtx |
| 3441 | function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, |
| 3442 | machine_mode orig_mode, const_tree type, bool named) |
| 3443 | { |
| 3444 | /* Handle a hidden AL argument containing number of registers |
| 3445 | for varargs x86-64 functions. */ |
| 3446 | if (mode == VOIDmode) |
| 3447 | return GEN_INT (cum->maybe_vaarg |
| 3448 | ? (cum->sse_nregs < 0 |
| 3449 | ? X86_64_SSE_REGPARM_MAX |
| 3450 | : cum->sse_regno) |
| 3451 | : -1); |
| 3452 | |
| 3453 | switch (mode) |
| 3454 | { |
| 3455 | default: |
| 3456 | break; |
| 3457 | |
| 3458 | case E_V16HFmode: |
| 3459 | case E_V16BFmode: |
| 3460 | case E_V8SFmode: |
| 3461 | case E_V8SImode: |
| 3462 | case E_V32QImode: |
| 3463 | case E_V16HImode: |
| 3464 | case E_V4DFmode: |
| 3465 | case E_V4DImode: |
| 3466 | case E_V32HFmode: |
| 3467 | case E_V32BFmode: |
| 3468 | case E_V16SFmode: |
| 3469 | case E_V16SImode: |
| 3470 | case E_V64QImode: |
| 3471 | case E_V32HImode: |
| 3472 | case E_V8DFmode: |
| 3473 | case E_V8DImode: |
| 3474 | /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ |
| 3475 | if (!named) |
| 3476 | return NULL; |
| 3477 | break; |
| 3478 | } |
| 3479 | |
| 3480 | const int *parm_regs; |
| 3481 | if (cum->preserve_none_abi) |
| 3482 | parm_regs = x86_64_preserve_none_int_parameter_registers; |
| 3483 | else |
| 3484 | parm_regs = x86_64_int_parameter_registers; |
| 3485 | |
| 3486 | return construct_container (mode, orig_mode, type, in_return: 0, nintregs: cum->nregs, |
| 3487 | nsseregs: cum->sse_nregs, |
| 3488 | intreg: &parm_regs[cum->regno], |
| 3489 | sse_regno: cum->sse_regno); |
| 3490 | } |
| 3491 | |
| 3492 | static rtx |
| 3493 | function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, |
| 3494 | machine_mode orig_mode, bool named, const_tree type, |
| 3495 | HOST_WIDE_INT bytes) |
| 3496 | { |
| 3497 | unsigned int regno; |
| 3498 | |
| 3499 | /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. |
| 3500 | We use value of -2 to specify that current function call is MSABI. */ |
| 3501 | if (mode == VOIDmode) |
| 3502 | return GEN_INT (-2); |
| 3503 | |
| 3504 | /* If we've run out of registers, it goes on the stack. */ |
| 3505 | if (cum->nregs == 0) |
| 3506 | return NULL_RTX; |
| 3507 | |
| 3508 | regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; |
| 3509 | |
| 3510 | /* Only floating point modes less than 64 bits are passed in anything but |
| 3511 | integer regs. Larger floating point types are excluded as the Windows |
| 3512 | ABI requires vreg args can be shadowed in GPRs (for red zone / varargs). */ |
| 3513 | if (TARGET_SSE && (mode == HFmode || mode == SFmode || mode == DFmode)) |
| 3514 | { |
| 3515 | if (named) |
| 3516 | { |
| 3517 | if (type == NULL_TREE || !AGGREGATE_TYPE_P (type)) |
| 3518 | regno = cum->regno + FIRST_SSE_REG; |
| 3519 | } |
| 3520 | else |
| 3521 | { |
| 3522 | rtx t1, t2; |
| 3523 | |
| 3524 | /* Unnamed floating parameters are passed in both the |
| 3525 | SSE and integer registers. */ |
| 3526 | t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); |
| 3527 | t2 = gen_rtx_REG (mode, regno); |
| 3528 | t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); |
| 3529 | t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); |
| 3530 | return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); |
| 3531 | } |
| 3532 | } |
| 3533 | /* Handle aggregated types passed in register. */ |
| 3534 | if (orig_mode == BLKmode) |
| 3535 | { |
| 3536 | if (bytes > 0 && bytes <= 8) |
| 3537 | mode = (bytes > 4 ? DImode : SImode); |
| 3538 | if (mode == BLKmode) |
| 3539 | mode = DImode; |
| 3540 | } |
| 3541 | |
| 3542 | return gen_reg_or_parallel (mode, orig_mode, regno); |
| 3543 | } |
| 3544 | |
| 3545 | /* Return where to put the arguments to a function. |
| 3546 | Return zero to push the argument on the stack, or a hard register in which to store the argument. |
| 3547 | |
| 3548 | ARG describes the argument while CUM gives information about the |
| 3549 | preceding args and about the function being called. */ |
| 3550 | |
| 3551 | static rtx |
| 3552 | ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) |
| 3553 | { |
| 3554 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
| 3555 | machine_mode mode = arg.mode; |
| 3556 | HOST_WIDE_INT bytes, words; |
| 3557 | rtx reg; |
| 3558 | |
| 3559 | if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) |
| 3560 | { |
| 3561 | gcc_assert (arg.type != NULL_TREE); |
| 3562 | if (POINTER_TYPE_P (arg.type)) |
| 3563 | { |
| 3564 | /* This is the pointer argument. */ |
| 3565 | gcc_assert (TYPE_MODE (arg.type) == ptr_mode); |
| 3566 | /* It is at -WORD(AP) in the current frame in interrupt and |
| 3567 | exception handlers. */ |
| 3568 | reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD); |
| 3569 | } |
| 3570 | else |
| 3571 | { |
| 3572 | gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION |
| 3573 | && TREE_CODE (arg.type) == INTEGER_TYPE |
| 3574 | && TYPE_MODE (arg.type) == word_mode); |
| 3575 | /* The error code is the word-mode integer argument at |
| 3576 | -2 * WORD(AP) in the current frame of the exception |
| 3577 | handler. */ |
| 3578 | reg = gen_rtx_MEM (word_mode, |
| 3579 | plus_constant (Pmode, |
| 3580 | arg_pointer_rtx, |
| 3581 | -2 * UNITS_PER_WORD)); |
| 3582 | } |
| 3583 | return reg; |
| 3584 | } |
| 3585 | |
| 3586 | bytes = arg.promoted_size_in_bytes (); |
| 3587 | words = CEIL (bytes, UNITS_PER_WORD); |
| 3588 | |
| 3589 | /* To simplify the code below, represent vector types with a vector mode |
| 3590 | even if MMX/SSE are not active. */ |
| 3591 | if (arg.type && VECTOR_TYPE_P (arg.type)) |
| 3592 | mode = type_natural_mode (type: arg.type, cum, in_return: false); |
| 3593 | |
| 3594 | if (TARGET_64BIT) |
| 3595 | { |
| 3596 | enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
| 3597 | |
| 3598 | if (call_abi == MS_ABI) |
| 3599 | reg = function_arg_ms_64 (cum, mode, orig_mode: arg.mode, named: arg.named, |
| 3600 | type: arg.type, bytes); |
| 3601 | else |
| 3602 | reg = function_arg_64 (cum, mode, orig_mode: arg.mode, type: arg.type, named: arg.named); |
| 3603 | } |
| 3604 | else |
| 3605 | reg = function_arg_32 (cum, mode, orig_mode: arg.mode, type: arg.type, bytes, words); |
| 3606 | |
| 3607 | /* Track if there are outgoing arguments on stack. */ |
| 3608 | if (reg == NULL_RTX && cum->caller) |
| 3609 | cfun->machine->outgoing_args_on_stack = true; |
| 3610 | |
| 3611 | return reg; |
| 3612 | } |
| 3613 | |
| 3614 | /* A C expression that indicates when an argument must be passed by |
| 3615 | reference. If nonzero for an argument, a copy of that argument is |
| 3616 | made in memory and a pointer to the argument is passed instead of |
| 3617 | the argument itself. The pointer is passed in whatever way is |
| 3618 | appropriate for passing a pointer to that type. */ |
| 3619 | |
| 3620 | static bool |
| 3621 | ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) |
| 3622 | { |
| 3623 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
| 3624 | |
| 3625 | if (TARGET_64BIT) |
| 3626 | { |
| 3627 | enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
| 3628 | |
| 3629 | /* See Windows x64 Software Convention. */ |
| 3630 | if (call_abi == MS_ABI) |
| 3631 | { |
| 3632 | HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode); |
| 3633 | |
| 3634 | if (tree type = arg.type) |
| 3635 | { |
| 3636 | /* Arrays are passed by reference. */ |
| 3637 | if (TREE_CODE (type) == ARRAY_TYPE) |
| 3638 | return true; |
| 3639 | |
| 3640 | if (RECORD_OR_UNION_TYPE_P (type)) |
| 3641 | { |
| 3642 | /* Structs/unions of sizes other than 8, 16, 32, or 64 bits |
| 3643 | are passed by reference. */ |
| 3644 | msize = int_size_in_bytes (type); |
| 3645 | } |
| 3646 | } |
| 3647 | |
| 3648 | /* __m128 is passed by reference. */ |
| 3649 | return msize != 1 && msize != 2 && msize != 4 && msize != 8; |
| 3650 | } |
| 3651 | else if (arg.type && int_size_in_bytes (arg.type) == -1) |
| 3652 | return true; |
| 3653 | } |
| 3654 | |
| 3655 | return false; |
| 3656 | } |
| 3657 | |
| 3658 | /* Return true when TYPE should be 128bit aligned for 32bit argument |
| 3659 | passing ABI. XXX: This function is obsolete and is only used for |
| 3660 | checking psABI compatibility with previous versions of GCC. */ |
| 3661 | |
| 3662 | static bool |
| 3663 | ix86_compat_aligned_value_p (const_tree type) |
| 3664 | { |
| 3665 | machine_mode mode = TYPE_MODE (type); |
| 3666 | if (((TARGET_SSE && SSE_REG_MODE_P (mode)) |
| 3667 | || mode == TDmode |
| 3668 | || mode == TFmode |
| 3669 | || mode == TCmode) |
| 3670 | && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) |
| 3671 | return true; |
| 3672 | if (TYPE_ALIGN (type) < 128) |
| 3673 | return false; |
| 3674 | |
| 3675 | if (AGGREGATE_TYPE_P (type)) |
| 3676 | { |
| 3677 | /* Walk the aggregates recursively. */ |
| 3678 | switch (TREE_CODE (type)) |
| 3679 | { |
| 3680 | case RECORD_TYPE: |
| 3681 | case UNION_TYPE: |
| 3682 | case QUAL_UNION_TYPE: |
| 3683 | { |
| 3684 | tree field; |
| 3685 | |
| 3686 | /* Walk all the structure fields. */ |
| 3687 | for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
| 3688 | { |
| 3689 | if (TREE_CODE (field) == FIELD_DECL |
| 3690 | && ix86_compat_aligned_value_p (TREE_TYPE (field))) |
| 3691 | return true; |
| 3692 | } |
| 3693 | break; |
| 3694 | } |
| 3695 | |
| 3696 | case ARRAY_TYPE: |
| 3697 | /* Just for use if some languages passes arrays by value. */ |
| 3698 | if (ix86_compat_aligned_value_p (TREE_TYPE (type))) |
| 3699 | return true; |
| 3700 | break; |
| 3701 | |
| 3702 | default: |
| 3703 | gcc_unreachable (); |
| 3704 | } |
| 3705 | } |
| 3706 | return false; |
| 3707 | } |
| 3708 | |
| 3709 | /* Return the alignment boundary for MODE and TYPE with alignment ALIGN. |
| 3710 | XXX: This function is obsolete and is only used for checking psABI |
| 3711 | compatibility with previous versions of GCC. */ |
| 3712 | |
| 3713 | static unsigned int |
| 3714 | ix86_compat_function_arg_boundary (machine_mode mode, |
| 3715 | const_tree type, unsigned int align) |
| 3716 | { |
| 3717 | /* In 32bit, only _Decimal128 and __float128 are aligned to their |
| 3718 | natural boundaries. */ |
| 3719 | if (!TARGET_64BIT && mode != TDmode && mode != TFmode) |
| 3720 | { |
| 3721 | /* i386 ABI defines all arguments to be 4 byte aligned. We have to |
| 3722 | make an exception for SSE modes since these require 128bit |
| 3723 | alignment. |
| 3724 | |
| 3725 | The handling here differs from field_alignment. ICC aligns MMX |
| 3726 | arguments to 4 byte boundaries, while structure fields are aligned |
| 3727 | to 8 byte boundaries. */ |
| 3728 | if (!type) |
| 3729 | { |
| 3730 | if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) |
| 3731 | align = PARM_BOUNDARY; |
| 3732 | } |
| 3733 | else |
| 3734 | { |
| 3735 | if (!ix86_compat_aligned_value_p (type)) |
| 3736 | align = PARM_BOUNDARY; |
| 3737 | } |
| 3738 | } |
| 3739 | if (align > BIGGEST_ALIGNMENT) |
| 3740 | align = BIGGEST_ALIGNMENT; |
| 3741 | return align; |
| 3742 | } |
| 3743 | |
| 3744 | /* Return true when TYPE should be 128bit aligned for 32bit argument |
| 3745 | passing ABI. */ |
| 3746 | |
| 3747 | static bool |
| 3748 | ix86_contains_aligned_value_p (const_tree type) |
| 3749 | { |
| 3750 | machine_mode mode = TYPE_MODE (type); |
| 3751 | |
| 3752 | if (mode == XFmode || mode == XCmode) |
| 3753 | return false; |
| 3754 | |
| 3755 | if (TYPE_ALIGN (type) < 128) |
| 3756 | return false; |
| 3757 | |
| 3758 | if (AGGREGATE_TYPE_P (type)) |
| 3759 | { |
| 3760 | /* Walk the aggregates recursively. */ |
| 3761 | switch (TREE_CODE (type)) |
| 3762 | { |
| 3763 | case RECORD_TYPE: |
| 3764 | case UNION_TYPE: |
| 3765 | case QUAL_UNION_TYPE: |
| 3766 | { |
| 3767 | tree field; |
| 3768 | |
| 3769 | /* Walk all the structure fields. */ |
| 3770 | for (field = TYPE_FIELDS (type); |
| 3771 | field; |
| 3772 | field = DECL_CHAIN (field)) |
| 3773 | { |
| 3774 | if (TREE_CODE (field) == FIELD_DECL |
| 3775 | && ix86_contains_aligned_value_p (TREE_TYPE (field))) |
| 3776 | return true; |
| 3777 | } |
| 3778 | break; |
| 3779 | } |
| 3780 | |
| 3781 | case ARRAY_TYPE: |
| 3782 | /* Just for use if some languages passes arrays by value. */ |
| 3783 | if (ix86_contains_aligned_value_p (TREE_TYPE (type))) |
| 3784 | return true; |
| 3785 | break; |
| 3786 | |
| 3787 | default: |
| 3788 | gcc_unreachable (); |
| 3789 | } |
| 3790 | } |
| 3791 | else |
| 3792 | return TYPE_ALIGN (type) >= 128; |
| 3793 | |
| 3794 | return false; |
| 3795 | } |
| 3796 | |
| 3797 | /* Gives the alignment boundary, in bits, of an argument with the |
| 3798 | specified mode and type. */ |
| 3799 | |
| 3800 | static unsigned int |
| 3801 | ix86_function_arg_boundary (machine_mode mode, const_tree type) |
| 3802 | { |
| 3803 | unsigned int align; |
| 3804 | if (type) |
| 3805 | { |
| 3806 | /* Since the main variant type is used for call, we convert it to |
| 3807 | the main variant type. */ |
| 3808 | type = TYPE_MAIN_VARIANT (type); |
| 3809 | align = TYPE_ALIGN (type); |
| 3810 | if (TYPE_EMPTY_P (type)) |
| 3811 | return PARM_BOUNDARY; |
| 3812 | } |
| 3813 | else |
| 3814 | align = GET_MODE_ALIGNMENT (mode); |
| 3815 | if (align < PARM_BOUNDARY) |
| 3816 | align = PARM_BOUNDARY; |
| 3817 | else |
| 3818 | { |
| 3819 | static bool warned; |
| 3820 | unsigned int saved_align = align; |
| 3821 | |
| 3822 | if (!TARGET_64BIT) |
| 3823 | { |
| 3824 | /* i386 ABI defines XFmode arguments to be 4 byte aligned. */ |
| 3825 | if (!type) |
| 3826 | { |
| 3827 | if (mode == XFmode || mode == XCmode) |
| 3828 | align = PARM_BOUNDARY; |
| 3829 | } |
| 3830 | else if (!ix86_contains_aligned_value_p (type)) |
| 3831 | align = PARM_BOUNDARY; |
| 3832 | |
| 3833 | if (align < 128) |
| 3834 | align = PARM_BOUNDARY; |
| 3835 | } |
| 3836 | |
| 3837 | if (warn_psabi |
| 3838 | && !warned |
| 3839 | && align != ix86_compat_function_arg_boundary (mode, type, |
| 3840 | align: saved_align)) |
| 3841 | { |
| 3842 | warned = true; |
| 3843 | inform (input_location, |
| 3844 | "the ABI for passing parameters with %d-byte" |
| 3845 | " alignment has changed in GCC 4.6" , |
| 3846 | align / BITS_PER_UNIT); |
| 3847 | } |
| 3848 | } |
| 3849 | |
| 3850 | return align; |
| 3851 | } |
| 3852 | |
| 3853 | /* Return true if N is a possible register number of function value. */ |
| 3854 | |
| 3855 | static bool |
| 3856 | ix86_function_value_regno_p (const unsigned int regno) |
| 3857 | { |
| 3858 | switch (regno) |
| 3859 | { |
| 3860 | case AX_REG: |
| 3861 | return true; |
| 3862 | case DX_REG: |
| 3863 | return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI); |
| 3864 | case DI_REG: |
| 3865 | case SI_REG: |
| 3866 | return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; |
| 3867 | |
| 3868 | /* Complex values are returned in %st(0)/%st(1) pair. */ |
| 3869 | case ST0_REG: |
| 3870 | case ST1_REG: |
| 3871 | /* TODO: The function should depend on current function ABI but |
| 3872 | builtins.cc would need updating then. Therefore we use the |
| 3873 | default ABI. */ |
| 3874 | if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) |
| 3875 | return false; |
| 3876 | return TARGET_FLOAT_RETURNS_IN_80387; |
| 3877 | |
| 3878 | /* Complex values are returned in %xmm0/%xmm1 pair. */ |
| 3879 | case XMM0_REG: |
| 3880 | case XMM1_REG: |
| 3881 | return TARGET_SSE; |
| 3882 | |
| 3883 | case MM0_REG: |
| 3884 | if (TARGET_MACHO || TARGET_64BIT) |
| 3885 | return false; |
| 3886 | return TARGET_MMX; |
| 3887 | } |
| 3888 | |
| 3889 | return false; |
| 3890 | } |
| 3891 | |
| 3892 | /* Check whether the register REGNO should be zeroed on X86. |
| 3893 | When ALL_SSE_ZEROED is true, all SSE registers have been zeroed |
| 3894 | together, no need to zero it again. |
| 3895 | When NEED_ZERO_MMX is true, MMX registers should be cleared. */ |
| 3896 | |
| 3897 | static bool |
| 3898 | zero_call_used_regno_p (const unsigned int regno, |
| 3899 | bool all_sse_zeroed, |
| 3900 | bool need_zero_mmx) |
| 3901 | { |
| 3902 | return GENERAL_REGNO_P (regno) |
| 3903 | || (!all_sse_zeroed && SSE_REGNO_P (regno)) |
| 3904 | || MASK_REGNO_P (regno) |
| 3905 | || (need_zero_mmx && MMX_REGNO_P (regno)); |
| 3906 | } |
| 3907 | |
| 3908 | /* Return the machine_mode that is used to zero register REGNO. */ |
| 3909 | |
| 3910 | static machine_mode |
| 3911 | zero_call_used_regno_mode (const unsigned int regno) |
| 3912 | { |
| 3913 | /* NB: We only need to zero the lower 32 bits for integer registers |
| 3914 | and the lower 128 bits for vector registers since destination are |
| 3915 | zero-extended to the full register width. */ |
| 3916 | if (GENERAL_REGNO_P (regno)) |
| 3917 | return SImode; |
| 3918 | else if (SSE_REGNO_P (regno)) |
| 3919 | return V4SFmode; |
| 3920 | else if (MASK_REGNO_P (regno)) |
| 3921 | return HImode; |
| 3922 | else if (MMX_REGNO_P (regno)) |
| 3923 | return V2SImode; |
| 3924 | else |
| 3925 | gcc_unreachable (); |
| 3926 | } |
| 3927 | |
| 3928 | /* Generate a rtx to zero all vector registers together if possible, |
| 3929 | otherwise, return NULL. */ |
| 3930 | |
| 3931 | static rtx |
| 3932 | zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs) |
| 3933 | { |
| 3934 | if (!TARGET_AVX) |
| 3935 | return NULL; |
| 3936 | |
| 3937 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 3938 | if ((LEGACY_SSE_REGNO_P (regno) |
| 3939 | || (TARGET_64BIT |
| 3940 | && (REX_SSE_REGNO_P (regno) |
| 3941 | || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno))))) |
| 3942 | && !TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
| 3943 | return NULL; |
| 3944 | |
| 3945 | return gen_avx_vzeroall (); |
| 3946 | } |
| 3947 | |
| 3948 | /* Generate insns to zero all st registers together. |
| 3949 | Return true when zeroing instructions are generated. |
| 3950 | Assume the number of st registers that are zeroed is num_of_st, |
| 3951 | we will emit the following sequence to zero them together: |
| 3952 | fldz; \ |
| 3953 | fldz; \ |
| 3954 | ... |
| 3955 | fldz; \ |
| 3956 | fstp %%st(0); \ |
| 3957 | fstp %%st(0); \ |
| 3958 | ... |
| 3959 | fstp %%st(0); |
| 3960 | i.e., num_of_st fldz followed by num_of_st fstp to clear the stack |
| 3961 | mark stack slots empty. |
| 3962 | |
| 3963 | How to compute the num_of_st: |
| 3964 | There is no direct mapping from stack registers to hard register |
| 3965 | numbers. If one stack register needs to be cleared, we don't know |
| 3966 | where in the stack the value remains. So, if any stack register |
| 3967 | needs to be cleared, the whole stack should be cleared. However, |
| 3968 | x87 stack registers that hold the return value should be excluded. |
| 3969 | x87 returns in the top (two for complex values) register, so |
| 3970 | num_of_st should be 7/6 when x87 returns, otherwise it will be 8. |
| 3971 | return the value of num_of_st. */ |
| 3972 | |
| 3973 | |
| 3974 | static int |
| 3975 | zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs) |
| 3976 | { |
| 3977 | |
| 3978 | /* If the FPU is disabled, no need to zero all st registers. */ |
| 3979 | if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) |
| 3980 | return 0; |
| 3981 | |
| 3982 | unsigned int num_of_st = 0; |
| 3983 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 3984 | if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno)) |
| 3985 | && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
| 3986 | { |
| 3987 | num_of_st++; |
| 3988 | break; |
| 3989 | } |
| 3990 | |
| 3991 | if (num_of_st == 0) |
| 3992 | return 0; |
| 3993 | |
| 3994 | bool return_with_x87 = false; |
| 3995 | return_with_x87 = (crtl->return_rtx |
| 3996 | && (STACK_REG_P (crtl->return_rtx))); |
| 3997 | |
| 3998 | bool complex_return = false; |
| 3999 | complex_return = (crtl->return_rtx |
| 4000 | && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx))); |
| 4001 | |
| 4002 | if (return_with_x87) |
| 4003 | if (complex_return) |
| 4004 | num_of_st = 6; |
| 4005 | else |
| 4006 | num_of_st = 7; |
| 4007 | else |
| 4008 | num_of_st = 8; |
| 4009 | |
| 4010 | rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG); |
| 4011 | for (unsigned int i = 0; i < num_of_st; i++) |
| 4012 | emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode))); |
| 4013 | |
| 4014 | for (unsigned int i = 0; i < num_of_st; i++) |
| 4015 | { |
| 4016 | rtx insn; |
| 4017 | insn = emit_insn (gen_rtx_SET (st_reg, st_reg)); |
| 4018 | add_reg_note (insn, REG_DEAD, st_reg); |
| 4019 | } |
| 4020 | return num_of_st; |
| 4021 | } |
| 4022 | |
| 4023 | |
| 4024 | /* When the routine exit in MMX mode, if any ST register needs |
| 4025 | to be zeroed, we should clear all MMX registers except the |
| 4026 | RET_MMX_REGNO that holds the return value. */ |
| 4027 | static bool |
| 4028 | zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs, |
| 4029 | unsigned int ret_mmx_regno) |
| 4030 | { |
| 4031 | bool need_zero_all_mm = false; |
| 4032 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 4033 | if (STACK_REGNO_P (regno) |
| 4034 | && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
| 4035 | { |
| 4036 | need_zero_all_mm = true; |
| 4037 | break; |
| 4038 | } |
| 4039 | |
| 4040 | if (!need_zero_all_mm) |
| 4041 | return false; |
| 4042 | |
| 4043 | machine_mode mode = V2SImode; |
| 4044 | for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++) |
| 4045 | if (regno != ret_mmx_regno) |
| 4046 | { |
| 4047 | rtx reg = gen_rtx_REG (mode, regno); |
| 4048 | emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode))); |
| 4049 | } |
| 4050 | return true; |
| 4051 | } |
| 4052 | |
| 4053 | /* TARGET_ZERO_CALL_USED_REGS. */ |
| 4054 | /* Generate a sequence of instructions that zero registers specified by |
| 4055 | NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually |
| 4056 | zeroed. */ |
| 4057 | static HARD_REG_SET |
| 4058 | ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs) |
| 4059 | { |
| 4060 | HARD_REG_SET zeroed_hardregs; |
| 4061 | bool all_sse_zeroed = false; |
| 4062 | int all_st_zeroed_num = 0; |
| 4063 | bool all_mm_zeroed = false; |
| 4064 | |
| 4065 | CLEAR_HARD_REG_SET (set&: zeroed_hardregs); |
| 4066 | |
| 4067 | /* first, let's see whether we can zero all vector registers together. */ |
| 4068 | rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs); |
| 4069 | if (zero_all_vec_insn) |
| 4070 | { |
| 4071 | emit_insn (zero_all_vec_insn); |
| 4072 | all_sse_zeroed = true; |
| 4073 | } |
| 4074 | |
| 4075 | /* mm/st registers are shared registers set, we should follow the following |
| 4076 | rules to clear them: |
| 4077 | MMX exit mode x87 exit mode |
| 4078 | -------------|----------------------|--------------- |
| 4079 | uses x87 reg | clear all MMX | clear all x87 |
| 4080 | uses MMX reg | clear individual MMX | clear all x87 |
| 4081 | x87 + MMX | clear all MMX | clear all x87 |
| 4082 | |
| 4083 | first, we should decide which mode (MMX mode or x87 mode) the function |
| 4084 | exit with. */ |
| 4085 | |
| 4086 | bool exit_with_mmx_mode = (crtl->return_rtx |
| 4087 | && (MMX_REG_P (crtl->return_rtx))); |
| 4088 | |
| 4089 | if (!exit_with_mmx_mode) |
| 4090 | /* x87 exit mode, we should zero all st registers together. */ |
| 4091 | { |
| 4092 | all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs); |
| 4093 | |
| 4094 | if (all_st_zeroed_num > 0) |
| 4095 | for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++) |
| 4096 | /* x87 stack registers that hold the return value should be excluded. |
| 4097 | x87 returns in the top (two for complex values) register. */ |
| 4098 | if (all_st_zeroed_num == 8 |
| 4099 | || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx)) |
| 4100 | || (all_st_zeroed_num == 6 |
| 4101 | && (regno == (REGNO (crtl->return_rtx) + 1))))) |
| 4102 | SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno); |
| 4103 | } |
| 4104 | else |
| 4105 | /* MMX exit mode, check whether we can zero all mm registers. */ |
| 4106 | { |
| 4107 | unsigned int exit_mmx_regno = REGNO (crtl->return_rtx); |
| 4108 | all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs, |
| 4109 | ret_mmx_regno: exit_mmx_regno); |
| 4110 | if (all_mm_zeroed) |
| 4111 | for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++) |
| 4112 | if (regno != exit_mmx_regno) |
| 4113 | SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno); |
| 4114 | } |
| 4115 | |
| 4116 | /* Now, generate instructions to zero all the other registers. */ |
| 4117 | |
| 4118 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 4119 | { |
| 4120 | if (!TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
| 4121 | continue; |
| 4122 | if (!zero_call_used_regno_p (regno, all_sse_zeroed, |
| 4123 | need_zero_mmx: exit_with_mmx_mode && !all_mm_zeroed)) |
| 4124 | continue; |
| 4125 | |
| 4126 | SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno); |
| 4127 | |
| 4128 | machine_mode mode = zero_call_used_regno_mode (regno); |
| 4129 | |
| 4130 | rtx reg = gen_rtx_REG (mode, regno); |
| 4131 | rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode)); |
| 4132 | |
| 4133 | switch (mode) |
| 4134 | { |
| 4135 | case E_SImode: |
| 4136 | if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) |
| 4137 | { |
| 4138 | rtx clob = gen_rtx_CLOBBER (VOIDmode, |
| 4139 | gen_rtx_REG (CCmode, |
| 4140 | FLAGS_REG)); |
| 4141 | tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, |
| 4142 | tmp, |
| 4143 | clob)); |
| 4144 | } |
| 4145 | /* FALLTHRU. */ |
| 4146 | |
| 4147 | case E_V4SFmode: |
| 4148 | case E_HImode: |
| 4149 | case E_V2SImode: |
| 4150 | emit_insn (tmp); |
| 4151 | break; |
| 4152 | |
| 4153 | default: |
| 4154 | gcc_unreachable (); |
| 4155 | } |
| 4156 | } |
| 4157 | return zeroed_hardregs; |
| 4158 | } |
| 4159 | |
| 4160 | /* Define how to find the value returned by a function. |
| 4161 | VALTYPE is the data type of the value (as a tree). |
| 4162 | If the precise function being called is known, FUNC is its FUNCTION_DECL; |
| 4163 | otherwise, FUNC is 0. */ |
| 4164 | |
| 4165 | static rtx |
| 4166 | function_value_32 (machine_mode orig_mode, machine_mode mode, |
| 4167 | const_tree fntype, const_tree fn) |
| 4168 | { |
| 4169 | unsigned int regno; |
| 4170 | |
| 4171 | /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where |
| 4172 | we normally prevent this case when mmx is not available. However |
| 4173 | some ABIs may require the result to be returned like DImode. */ |
| 4174 | if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) |
| 4175 | regno = FIRST_MMX_REG; |
| 4176 | |
| 4177 | /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where |
| 4178 | we prevent this case when sse is not available. However some ABIs |
| 4179 | may require the result to be returned like integer TImode. */ |
| 4180 | else if (mode == TImode |
| 4181 | || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) |
| 4182 | regno = FIRST_SSE_REG; |
| 4183 | |
| 4184 | /* 32-byte vector modes in %ymm0. */ |
| 4185 | else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) |
| 4186 | regno = FIRST_SSE_REG; |
| 4187 | |
| 4188 | /* 64-byte vector modes in %zmm0. */ |
| 4189 | else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) |
| 4190 | regno = FIRST_SSE_REG; |
| 4191 | |
| 4192 | /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ |
| 4193 | else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) |
| 4194 | regno = FIRST_FLOAT_REG; |
| 4195 | else |
| 4196 | /* Most things go in %eax. */ |
| 4197 | regno = AX_REG; |
| 4198 | |
| 4199 | /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */ |
| 4200 | if (mode == HFmode || mode == BFmode) |
| 4201 | { |
| 4202 | if (!TARGET_SSE2) |
| 4203 | { |
| 4204 | error ("SSE register return with SSE2 disabled" ); |
| 4205 | regno = AX_REG; |
| 4206 | } |
| 4207 | else |
| 4208 | regno = FIRST_SSE_REG; |
| 4209 | } |
| 4210 | |
| 4211 | if (mode == HCmode) |
| 4212 | { |
| 4213 | if (!TARGET_SSE2) |
| 4214 | error ("SSE register return with SSE2 disabled" ); |
| 4215 | |
| 4216 | rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1)); |
| 4217 | XVECEXP (ret, 0, 0) |
| 4218 | = gen_rtx_EXPR_LIST (VOIDmode, |
| 4219 | gen_rtx_REG (SImode, |
| 4220 | TARGET_SSE2 ? FIRST_SSE_REG : AX_REG), |
| 4221 | GEN_INT (0)); |
| 4222 | return ret; |
| 4223 | } |
| 4224 | |
| 4225 | /* Override FP return register with %xmm0 for local functions when |
| 4226 | SSE math is enabled or for functions with sseregparm attribute. */ |
| 4227 | if ((fn || fntype) && (mode == SFmode || mode == DFmode)) |
| 4228 | { |
| 4229 | int sse_level = ix86_function_sseregparm (type: fntype, decl: fn, warn: false); |
| 4230 | if (sse_level == -1) |
| 4231 | { |
| 4232 | error ("calling %qD with SSE calling convention without " |
| 4233 | "SSE/SSE2 enabled" , fn); |
| 4234 | sorry ("this is a GCC bug that can be worked around by adding " |
| 4235 | "attribute used to function called" ); |
| 4236 | } |
| 4237 | else if ((sse_level >= 1 && mode == SFmode) |
| 4238 | || (sse_level == 2 && mode == DFmode)) |
| 4239 | regno = FIRST_SSE_REG; |
| 4240 | } |
| 4241 | |
| 4242 | /* OImode shouldn't be used directly. */ |
| 4243 | gcc_assert (mode != OImode); |
| 4244 | |
| 4245 | return gen_rtx_REG (orig_mode, regno); |
| 4246 | } |
| 4247 | |
| 4248 | static rtx |
| 4249 | function_value_64 (machine_mode orig_mode, machine_mode mode, |
| 4250 | const_tree valtype) |
| 4251 | { |
| 4252 | rtx ret; |
| 4253 | |
| 4254 | /* Handle libcalls, which don't provide a type node. */ |
| 4255 | if (valtype == NULL) |
| 4256 | { |
| 4257 | unsigned int regno; |
| 4258 | |
| 4259 | switch (mode) |
| 4260 | { |
| 4261 | case E_BFmode: |
| 4262 | case E_HFmode: |
| 4263 | case E_HCmode: |
| 4264 | case E_SFmode: |
| 4265 | case E_SCmode: |
| 4266 | case E_DFmode: |
| 4267 | case E_DCmode: |
| 4268 | case E_TFmode: |
| 4269 | case E_SDmode: |
| 4270 | case E_DDmode: |
| 4271 | case E_TDmode: |
| 4272 | regno = FIRST_SSE_REG; |
| 4273 | break; |
| 4274 | case E_XFmode: |
| 4275 | case E_XCmode: |
| 4276 | regno = FIRST_FLOAT_REG; |
| 4277 | break; |
| 4278 | case E_TCmode: |
| 4279 | return NULL; |
| 4280 | default: |
| 4281 | regno = AX_REG; |
| 4282 | } |
| 4283 | |
| 4284 | return gen_rtx_REG (mode, regno); |
| 4285 | } |
| 4286 | else if (POINTER_TYPE_P (valtype)) |
| 4287 | { |
| 4288 | /* Pointers are always returned in word_mode. */ |
| 4289 | mode = word_mode; |
| 4290 | } |
| 4291 | |
| 4292 | ret = construct_container (mode, orig_mode, type: valtype, in_return: 1, |
| 4293 | X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, |
| 4294 | intreg: x86_64_int_return_registers, sse_regno: 0); |
| 4295 | |
| 4296 | /* For zero sized structures, construct_container returns NULL, but we |
| 4297 | need to keep rest of compiler happy by returning meaningful value. */ |
| 4298 | if (!ret) |
| 4299 | ret = gen_rtx_REG (orig_mode, AX_REG); |
| 4300 | |
| 4301 | return ret; |
| 4302 | } |
| 4303 | |
| 4304 | static rtx |
| 4305 | function_value_ms_32 (machine_mode orig_mode, machine_mode mode, |
| 4306 | const_tree fntype, const_tree fn, const_tree valtype) |
| 4307 | { |
| 4308 | unsigned int regno; |
| 4309 | |
| 4310 | /* Floating point return values in %st(0) |
| 4311 | (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */ |
| 4312 | if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387 |
| 4313 | && (GET_MODE_SIZE (mode) > 8 |
| 4314 | || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype))) |
| 4315 | { |
| 4316 | regno = FIRST_FLOAT_REG; |
| 4317 | return gen_rtx_REG (orig_mode, regno); |
| 4318 | } |
| 4319 | else |
| 4320 | return function_value_32(orig_mode, mode, fntype,fn); |
| 4321 | } |
| 4322 | |
| 4323 | static rtx |
| 4324 | function_value_ms_64 (machine_mode orig_mode, machine_mode mode, |
| 4325 | const_tree valtype) |
| 4326 | { |
| 4327 | unsigned int regno = AX_REG; |
| 4328 | |
| 4329 | if (TARGET_SSE) |
| 4330 | { |
| 4331 | switch (GET_MODE_SIZE (mode)) |
| 4332 | { |
| 4333 | case 16: |
| 4334 | if (valtype != NULL_TREE |
| 4335 | && !VECTOR_INTEGER_TYPE_P (valtype) |
| 4336 | && !INTEGRAL_TYPE_P (valtype) |
| 4337 | && !VECTOR_FLOAT_TYPE_P (valtype)) |
| 4338 | break; |
| 4339 | if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) |
| 4340 | && !COMPLEX_MODE_P (mode)) |
| 4341 | regno = FIRST_SSE_REG; |
| 4342 | break; |
| 4343 | case 8: |
| 4344 | case 4: |
| 4345 | case 2: |
| 4346 | if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype)) |
| 4347 | break; |
| 4348 | if (mode == HFmode || mode == SFmode || mode == DFmode) |
| 4349 | regno = FIRST_SSE_REG; |
| 4350 | break; |
| 4351 | default: |
| 4352 | break; |
| 4353 | } |
| 4354 | } |
| 4355 | return gen_rtx_REG (orig_mode, regno); |
| 4356 | } |
| 4357 | |
| 4358 | static rtx |
| 4359 | ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, |
| 4360 | machine_mode orig_mode, machine_mode mode) |
| 4361 | { |
| 4362 | const_tree fn, fntype; |
| 4363 | |
| 4364 | fn = NULL_TREE; |
| 4365 | if (fntype_or_decl && DECL_P (fntype_or_decl)) |
| 4366 | fn = fntype_or_decl; |
| 4367 | fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; |
| 4368 | |
| 4369 | if (ix86_function_type_abi (fntype) == MS_ABI) |
| 4370 | { |
| 4371 | if (TARGET_64BIT) |
| 4372 | return function_value_ms_64 (orig_mode, mode, valtype); |
| 4373 | else |
| 4374 | return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype); |
| 4375 | } |
| 4376 | else if (TARGET_64BIT) |
| 4377 | return function_value_64 (orig_mode, mode, valtype); |
| 4378 | else |
| 4379 | return function_value_32 (orig_mode, mode, fntype, fn); |
| 4380 | } |
| 4381 | |
| 4382 | static rtx |
| 4383 | ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool) |
| 4384 | { |
| 4385 | machine_mode mode, orig_mode; |
| 4386 | |
| 4387 | orig_mode = TYPE_MODE (valtype); |
| 4388 | mode = type_natural_mode (type: valtype, NULL, in_return: true); |
| 4389 | return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); |
| 4390 | } |
| 4391 | |
| 4392 | /* Pointer function arguments and return values are promoted to |
| 4393 | word_mode for normal functions. */ |
| 4394 | |
| 4395 | static machine_mode |
| 4396 | ix86_promote_function_mode (const_tree type, machine_mode mode, |
| 4397 | int *punsignedp, const_tree fntype, |
| 4398 | int for_return) |
| 4399 | { |
| 4400 | if (cfun->machine->func_type == TYPE_NORMAL |
| 4401 | && type != NULL_TREE |
| 4402 | && POINTER_TYPE_P (type)) |
| 4403 | { |
| 4404 | *punsignedp = POINTERS_EXTEND_UNSIGNED; |
| 4405 | return word_mode; |
| 4406 | } |
| 4407 | return default_promote_function_mode (type, mode, punsignedp, fntype, |
| 4408 | for_return); |
| 4409 | } |
| 4410 | |
| 4411 | /* Return true if a structure, union or array with MODE containing FIELD |
| 4412 | should be accessed using BLKmode. */ |
| 4413 | |
| 4414 | static bool |
| 4415 | ix86_member_type_forces_blk (const_tree field, machine_mode mode) |
| 4416 | { |
| 4417 | /* Union with XFmode must be in BLKmode. */ |
| 4418 | return (mode == XFmode |
| 4419 | && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE |
| 4420 | || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE)); |
| 4421 | } |
| 4422 | |
| 4423 | rtx |
| 4424 | ix86_libcall_value (machine_mode mode) |
| 4425 | { |
| 4426 | return ix86_function_value_1 (NULL, NULL, orig_mode: mode, mode); |
| 4427 | } |
| 4428 | |
| 4429 | /* Return true iff type is returned in memory. */ |
| 4430 | |
| 4431 | static bool |
| 4432 | ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
| 4433 | { |
| 4434 | const machine_mode mode = type_natural_mode (type, NULL, in_return: true); |
| 4435 | HOST_WIDE_INT size; |
| 4436 | |
| 4437 | if (TARGET_64BIT) |
| 4438 | { |
| 4439 | if (ix86_function_type_abi (fntype) == MS_ABI) |
| 4440 | { |
| 4441 | size = int_size_in_bytes (type); |
| 4442 | |
| 4443 | /* __m128 is returned in xmm0. */ |
| 4444 | if ((!type || VECTOR_INTEGER_TYPE_P (type) |
| 4445 | || INTEGRAL_TYPE_P (type) |
| 4446 | || VECTOR_FLOAT_TYPE_P (type)) |
| 4447 | && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) |
| 4448 | && !COMPLEX_MODE_P (mode) |
| 4449 | && (GET_MODE_SIZE (mode) == 16 || size == 16)) |
| 4450 | return false; |
| 4451 | |
| 4452 | /* Otherwise, the size must be exactly in [1248]. */ |
| 4453 | return size != 1 && size != 2 && size != 4 && size != 8; |
| 4454 | } |
| 4455 | else |
| 4456 | { |
| 4457 | int needed_intregs, needed_sseregs; |
| 4458 | |
| 4459 | return examine_argument (mode, type, in_return: 1, |
| 4460 | int_nregs: &needed_intregs, sse_nregs: &needed_sseregs); |
| 4461 | } |
| 4462 | } |
| 4463 | else |
| 4464 | { |
| 4465 | size = int_size_in_bytes (type); |
| 4466 | |
| 4467 | /* Intel MCU psABI returns scalars and aggregates no larger than 8 |
| 4468 | bytes in registers. */ |
| 4469 | if (TARGET_IAMCU) |
| 4470 | return VECTOR_MODE_P (mode) || size < 0 || size > 8; |
| 4471 | |
| 4472 | if (mode == BLKmode) |
| 4473 | return true; |
| 4474 | |
| 4475 | if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) |
| 4476 | return false; |
| 4477 | |
| 4478 | if (VECTOR_MODE_P (mode) || mode == TImode) |
| 4479 | { |
| 4480 | /* User-created vectors small enough to fit in EAX. */ |
| 4481 | if (size < 8) |
| 4482 | return false; |
| 4483 | |
| 4484 | /* Unless ABI prescibes otherwise, |
| 4485 | MMX/3dNow values are returned in MM0 if available. */ |
| 4486 | |
| 4487 | if (size == 8) |
| 4488 | return TARGET_VECT8_RETURNS || !TARGET_MMX; |
| 4489 | |
| 4490 | /* SSE values are returned in XMM0 if available. */ |
| 4491 | if (size == 16) |
| 4492 | return !TARGET_SSE; |
| 4493 | |
| 4494 | /* AVX values are returned in YMM0 if available. */ |
| 4495 | if (size == 32) |
| 4496 | return !TARGET_AVX; |
| 4497 | |
| 4498 | /* AVX512F values are returned in ZMM0 if available. */ |
| 4499 | if (size == 64) |
| 4500 | return !TARGET_AVX512F; |
| 4501 | } |
| 4502 | |
| 4503 | if (mode == XFmode) |
| 4504 | return false; |
| 4505 | |
| 4506 | if (size > 12) |
| 4507 | return true; |
| 4508 | |
| 4509 | /* OImode shouldn't be used directly. */ |
| 4510 | gcc_assert (mode != OImode); |
| 4511 | |
| 4512 | return false; |
| 4513 | } |
| 4514 | } |
| 4515 | |
| 4516 | /* Implement TARGET_PUSH_ARGUMENT. */ |
| 4517 | |
| 4518 | static bool |
| 4519 | ix86_push_argument (unsigned int npush) |
| 4520 | { |
| 4521 | /* If SSE2 is available, use vector move to put large argument onto |
| 4522 | stack. NB: In 32-bit mode, use 8-byte vector move. */ |
| 4523 | return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8)) |
| 4524 | && TARGET_PUSH_ARGS |
| 4525 | && !ACCUMULATE_OUTGOING_ARGS); |
| 4526 | } |
| 4527 | |
| 4528 | |
| 4529 | /* Create the va_list data type. */ |
| 4530 | |
| 4531 | static tree |
| 4532 | ix86_build_builtin_va_list_64 (void) |
| 4533 | { |
| 4534 | tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; |
| 4535 | |
| 4536 | record = lang_hooks.types.make_type (RECORD_TYPE); |
| 4537 | type_decl = build_decl (BUILTINS_LOCATION, |
| 4538 | TYPE_DECL, get_identifier ("__va_list_tag" ), record); |
| 4539 | |
| 4540 | f_gpr = build_decl (BUILTINS_LOCATION, |
| 4541 | FIELD_DECL, get_identifier ("gp_offset" ), |
| 4542 | unsigned_type_node); |
| 4543 | f_fpr = build_decl (BUILTINS_LOCATION, |
| 4544 | FIELD_DECL, get_identifier ("fp_offset" ), |
| 4545 | unsigned_type_node); |
| 4546 | f_ovf = build_decl (BUILTINS_LOCATION, |
| 4547 | FIELD_DECL, get_identifier ("overflow_arg_area" ), |
| 4548 | ptr_type_node); |
| 4549 | f_sav = build_decl (BUILTINS_LOCATION, |
| 4550 | FIELD_DECL, get_identifier ("reg_save_area" ), |
| 4551 | ptr_type_node); |
| 4552 | |
| 4553 | va_list_gpr_counter_field = f_gpr; |
| 4554 | va_list_fpr_counter_field = f_fpr; |
| 4555 | |
| 4556 | DECL_FIELD_CONTEXT (f_gpr) = record; |
| 4557 | DECL_FIELD_CONTEXT (f_fpr) = record; |
| 4558 | DECL_FIELD_CONTEXT (f_ovf) = record; |
| 4559 | DECL_FIELD_CONTEXT (f_sav) = record; |
| 4560 | |
| 4561 | TYPE_STUB_DECL (record) = type_decl; |
| 4562 | TYPE_NAME (record) = type_decl; |
| 4563 | TYPE_FIELDS (record) = f_gpr; |
| 4564 | DECL_CHAIN (f_gpr) = f_fpr; |
| 4565 | DECL_CHAIN (f_fpr) = f_ovf; |
| 4566 | DECL_CHAIN (f_ovf) = f_sav; |
| 4567 | |
| 4568 | layout_type (record); |
| 4569 | |
| 4570 | TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list" ), |
| 4571 | NULL_TREE, TYPE_ATTRIBUTES (record)); |
| 4572 | |
| 4573 | /* The correct type is an array type of one element. */ |
| 4574 | return build_array_type (record, build_index_type (size_zero_node)); |
| 4575 | } |
| 4576 | |
| 4577 | /* Setup the builtin va_list data type and for 64-bit the additional |
| 4578 | calling convention specific va_list data types. */ |
| 4579 | |
| 4580 | static tree |
| 4581 | ix86_build_builtin_va_list (void) |
| 4582 | { |
| 4583 | if (TARGET_64BIT) |
| 4584 | { |
| 4585 | /* Initialize ABI specific va_list builtin types. |
| 4586 | |
| 4587 | In lto1, we can encounter two va_list types: |
| 4588 | - one as a result of the type-merge across TUs, and |
| 4589 | - the one constructed here. |
| 4590 | These two types will not have the same TYPE_MAIN_VARIANT, and therefore |
| 4591 | a type identity check in canonical_va_list_type based on |
| 4592 | TYPE_MAIN_VARIANT (which we used to have) will not work. |
| 4593 | Instead, we tag each va_list_type_node with its unique attribute, and |
| 4594 | look for the attribute in the type identity check in |
| 4595 | canonical_va_list_type. |
| 4596 | |
| 4597 | Tagging sysv_va_list_type_node directly with the attribute is |
| 4598 | problematic since it's a array of one record, which will degrade into a |
| 4599 | pointer to record when used as parameter (see build_va_arg comments for |
| 4600 | an example), dropping the attribute in the process. So we tag the |
| 4601 | record instead. */ |
| 4602 | |
| 4603 | /* For SYSV_ABI we use an array of one record. */ |
| 4604 | sysv_va_list_type_node = ix86_build_builtin_va_list_64 (); |
| 4605 | |
| 4606 | /* For MS_ABI we use plain pointer to argument area. */ |
| 4607 | tree char_ptr_type = build_pointer_type (char_type_node); |
| 4608 | tree attr = tree_cons (get_identifier ("ms_abi va_list" ), NULL_TREE, |
| 4609 | TYPE_ATTRIBUTES (char_ptr_type)); |
| 4610 | ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr); |
| 4611 | |
| 4612 | return ((ix86_abi == MS_ABI) |
| 4613 | ? ms_va_list_type_node |
| 4614 | : sysv_va_list_type_node); |
| 4615 | } |
| 4616 | else |
| 4617 | { |
| 4618 | /* For i386 we use plain pointer to argument area. */ |
| 4619 | return build_pointer_type (char_type_node); |
| 4620 | } |
| 4621 | } |
| 4622 | |
| 4623 | /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ |
| 4624 | |
| 4625 | static void |
| 4626 | setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) |
| 4627 | { |
| 4628 | rtx save_area, mem; |
| 4629 | alias_set_type set; |
| 4630 | int i, max; |
| 4631 | |
| 4632 | /* GPR size of varargs save area. */ |
| 4633 | if (cfun->va_list_gpr_size) |
| 4634 | ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; |
| 4635 | else |
| 4636 | ix86_varargs_gpr_size = 0; |
| 4637 | |
| 4638 | /* FPR size of varargs save area. We don't need it if we don't pass |
| 4639 | anything in SSE registers. */ |
| 4640 | if (TARGET_SSE && cfun->va_list_fpr_size) |
| 4641 | ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; |
| 4642 | else |
| 4643 | ix86_varargs_fpr_size = 0; |
| 4644 | |
| 4645 | if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) |
| 4646 | return; |
| 4647 | |
| 4648 | save_area = frame_pointer_rtx; |
| 4649 | set = get_varargs_alias_set (); |
| 4650 | |
| 4651 | max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; |
| 4652 | if (max > X86_64_REGPARM_MAX) |
| 4653 | max = X86_64_REGPARM_MAX; |
| 4654 | |
| 4655 | const int *parm_regs; |
| 4656 | if (cum->preserve_none_abi) |
| 4657 | parm_regs = x86_64_preserve_none_int_parameter_registers; |
| 4658 | else |
| 4659 | parm_regs = x86_64_int_parameter_registers; |
| 4660 | |
| 4661 | for (i = cum->regno; i < max; i++) |
| 4662 | { |
| 4663 | mem = gen_rtx_MEM (word_mode, |
| 4664 | plus_constant (Pmode, save_area, i * UNITS_PER_WORD)); |
| 4665 | MEM_NOTRAP_P (mem) = 1; |
| 4666 | set_mem_alias_set (mem, set); |
| 4667 | emit_move_insn (mem, |
| 4668 | gen_rtx_REG (word_mode, parm_regs[i])); |
| 4669 | } |
| 4670 | |
| 4671 | if (ix86_varargs_fpr_size) |
| 4672 | { |
| 4673 | machine_mode smode; |
| 4674 | rtx_code_label *label; |
| 4675 | rtx test; |
| 4676 | |
| 4677 | /* Now emit code to save SSE registers. The AX parameter contains number |
| 4678 | of SSE parameter registers used to call this function, though all we |
| 4679 | actually check here is the zero/non-zero status. */ |
| 4680 | |
| 4681 | label = gen_label_rtx (); |
| 4682 | test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); |
| 4683 | emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), |
| 4684 | label)); |
| 4685 | |
| 4686 | /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if |
| 4687 | we used movdqa (i.e. TImode) instead? Perhaps even better would |
| 4688 | be if we could determine the real mode of the data, via a hook |
| 4689 | into pass_stdarg. Ignore all that for now. */ |
| 4690 | smode = V4SFmode; |
| 4691 | if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode)) |
| 4692 | crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode); |
| 4693 | |
| 4694 | max = cum->sse_regno + cfun->va_list_fpr_size / 16; |
| 4695 | if (max > X86_64_SSE_REGPARM_MAX) |
| 4696 | max = X86_64_SSE_REGPARM_MAX; |
| 4697 | |
| 4698 | for (i = cum->sse_regno; i < max; ++i) |
| 4699 | { |
| 4700 | mem = plus_constant (Pmode, save_area, |
| 4701 | i * 16 + ix86_varargs_gpr_size); |
| 4702 | mem = gen_rtx_MEM (smode, mem); |
| 4703 | MEM_NOTRAP_P (mem) = 1; |
| 4704 | set_mem_alias_set (mem, set); |
| 4705 | set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); |
| 4706 | |
| 4707 | emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i))); |
| 4708 | } |
| 4709 | |
| 4710 | emit_label (label); |
| 4711 | } |
| 4712 | } |
| 4713 | |
| 4714 | static void |
| 4715 | setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) |
| 4716 | { |
| 4717 | alias_set_type set = get_varargs_alias_set (); |
| 4718 | int i; |
| 4719 | |
| 4720 | /* Reset to zero, as there might be a sysv vaarg used |
| 4721 | before. */ |
| 4722 | ix86_varargs_gpr_size = 0; |
| 4723 | ix86_varargs_fpr_size = 0; |
| 4724 | |
| 4725 | for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) |
| 4726 | { |
| 4727 | rtx reg, mem; |
| 4728 | |
| 4729 | mem = gen_rtx_MEM (Pmode, |
| 4730 | plus_constant (Pmode, virtual_incoming_args_rtx, |
| 4731 | i * UNITS_PER_WORD)); |
| 4732 | MEM_NOTRAP_P (mem) = 1; |
| 4733 | set_mem_alias_set (mem, set); |
| 4734 | |
| 4735 | reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); |
| 4736 | emit_move_insn (mem, reg); |
| 4737 | } |
| 4738 | } |
| 4739 | |
| 4740 | static void |
| 4741 | ix86_setup_incoming_varargs (cumulative_args_t cum_v, |
| 4742 | const function_arg_info &arg, |
| 4743 | int *, int no_rtl) |
| 4744 | { |
| 4745 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
| 4746 | CUMULATIVE_ARGS next_cum; |
| 4747 | tree fntype; |
| 4748 | |
| 4749 | /* This argument doesn't appear to be used anymore. Which is good, |
| 4750 | because the old code here didn't suppress rtl generation. */ |
| 4751 | gcc_assert (!no_rtl); |
| 4752 | |
| 4753 | if (!TARGET_64BIT) |
| 4754 | return; |
| 4755 | |
| 4756 | fntype = TREE_TYPE (current_function_decl); |
| 4757 | |
| 4758 | /* For varargs, we do not want to skip the dummy va_dcl argument. |
| 4759 | For stdargs, we do want to skip the last named argument. */ |
| 4760 | next_cum = *cum; |
| 4761 | if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)) |
| 4762 | || arg.type != NULL_TREE) |
| 4763 | && stdarg_p (fntype)) |
| 4764 | ix86_function_arg_advance (cum_v: pack_cumulative_args (arg: &next_cum), arg); |
| 4765 | |
| 4766 | if (cum->call_abi == MS_ABI) |
| 4767 | setup_incoming_varargs_ms_64 (&next_cum); |
| 4768 | else |
| 4769 | setup_incoming_varargs_64 (&next_cum); |
| 4770 | } |
| 4771 | |
| 4772 | /* Checks if TYPE is of kind va_list char *. */ |
| 4773 | |
| 4774 | static bool |
| 4775 | is_va_list_char_pointer (tree type) |
| 4776 | { |
| 4777 | tree canonic; |
| 4778 | |
| 4779 | /* For 32-bit it is always true. */ |
| 4780 | if (!TARGET_64BIT) |
| 4781 | return true; |
| 4782 | canonic = ix86_canonical_va_list_type (type); |
| 4783 | return (canonic == ms_va_list_type_node |
| 4784 | || (ix86_abi == MS_ABI && canonic == va_list_type_node)); |
| 4785 | } |
| 4786 | |
| 4787 | /* Implement va_start. */ |
| 4788 | |
| 4789 | static void |
| 4790 | ix86_va_start (tree valist, rtx nextarg) |
| 4791 | { |
| 4792 | HOST_WIDE_INT words, n_gpr, n_fpr; |
| 4793 | tree f_gpr, f_fpr, f_ovf, f_sav; |
| 4794 | tree gpr, fpr, ovf, sav, t; |
| 4795 | tree type; |
| 4796 | rtx ovf_rtx; |
| 4797 | |
| 4798 | if (flag_split_stack |
| 4799 | && cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
| 4800 | { |
| 4801 | unsigned int scratch_regno; |
| 4802 | |
| 4803 | /* When we are splitting the stack, we can't refer to the stack |
| 4804 | arguments using internal_arg_pointer, because they may be on |
| 4805 | the old stack. The split stack prologue will arrange to |
| 4806 | leave a pointer to the old stack arguments in a scratch |
| 4807 | register, which we here copy to a pseudo-register. The split |
| 4808 | stack prologue can't set the pseudo-register directly because |
| 4809 | it (the prologue) runs before any registers have been saved. */ |
| 4810 | |
| 4811 | scratch_regno = split_stack_prologue_scratch_regno (); |
| 4812 | if (scratch_regno != INVALID_REGNUM) |
| 4813 | { |
| 4814 | rtx reg; |
| 4815 | rtx_insn *seq; |
| 4816 | |
| 4817 | reg = gen_reg_rtx (Pmode); |
| 4818 | cfun->machine->split_stack_varargs_pointer = reg; |
| 4819 | |
| 4820 | start_sequence (); |
| 4821 | emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); |
| 4822 | seq = end_sequence (); |
| 4823 | |
| 4824 | push_topmost_sequence (); |
| 4825 | emit_insn_after (seq, entry_of_function ()); |
| 4826 | pop_topmost_sequence (); |
| 4827 | } |
| 4828 | } |
| 4829 | |
| 4830 | /* Only 64bit target needs something special. */ |
| 4831 | if (is_va_list_char_pointer (TREE_TYPE (valist))) |
| 4832 | { |
| 4833 | if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
| 4834 | std_expand_builtin_va_start (valist, nextarg); |
| 4835 | else |
| 4836 | { |
| 4837 | rtx va_r, next; |
| 4838 | |
| 4839 | va_r = expand_expr (exp: valist, NULL_RTX, VOIDmode, modifier: EXPAND_WRITE); |
| 4840 | next = expand_binop (ptr_mode, add_optab, |
| 4841 | cfun->machine->split_stack_varargs_pointer, |
| 4842 | crtl->args.arg_offset_rtx, |
| 4843 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
| 4844 | convert_move (va_r, next, 0); |
| 4845 | } |
| 4846 | return; |
| 4847 | } |
| 4848 | |
| 4849 | f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); |
| 4850 | f_fpr = DECL_CHAIN (f_gpr); |
| 4851 | f_ovf = DECL_CHAIN (f_fpr); |
| 4852 | f_sav = DECL_CHAIN (f_ovf); |
| 4853 | |
| 4854 | valist = build_simple_mem_ref (valist); |
| 4855 | TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node); |
| 4856 | /* The following should be folded into the MEM_REF offset. */ |
| 4857 | gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist), |
| 4858 | f_gpr, NULL_TREE); |
| 4859 | fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), |
| 4860 | f_fpr, NULL_TREE); |
| 4861 | ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), |
| 4862 | f_ovf, NULL_TREE); |
| 4863 | sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), |
| 4864 | f_sav, NULL_TREE); |
| 4865 | |
| 4866 | /* Count number of gp and fp argument registers used. */ |
| 4867 | words = crtl->args.info.words; |
| 4868 | n_gpr = crtl->args.info.regno; |
| 4869 | n_fpr = crtl->args.info.sse_regno; |
| 4870 | |
| 4871 | if (cfun->va_list_gpr_size) |
| 4872 | { |
| 4873 | type = TREE_TYPE (gpr); |
| 4874 | t = build2 (MODIFY_EXPR, type, |
| 4875 | gpr, build_int_cst (type, n_gpr * 8)); |
| 4876 | TREE_SIDE_EFFECTS (t) = 1; |
| 4877 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
| 4878 | } |
| 4879 | |
| 4880 | if (TARGET_SSE && cfun->va_list_fpr_size) |
| 4881 | { |
| 4882 | type = TREE_TYPE (fpr); |
| 4883 | t = build2 (MODIFY_EXPR, type, fpr, |
| 4884 | build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); |
| 4885 | TREE_SIDE_EFFECTS (t) = 1; |
| 4886 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
| 4887 | } |
| 4888 | |
| 4889 | /* Find the overflow area. */ |
| 4890 | type = TREE_TYPE (ovf); |
| 4891 | if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
| 4892 | ovf_rtx = crtl->args.internal_arg_pointer; |
| 4893 | else |
| 4894 | ovf_rtx = cfun->machine->split_stack_varargs_pointer; |
| 4895 | t = make_tree (type, ovf_rtx); |
| 4896 | if (words != 0) |
| 4897 | t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD); |
| 4898 | |
| 4899 | t = build2 (MODIFY_EXPR, type, ovf, t); |
| 4900 | TREE_SIDE_EFFECTS (t) = 1; |
| 4901 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
| 4902 | |
| 4903 | if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) |
| 4904 | { |
| 4905 | /* Find the register save area. |
| 4906 | Prologue of the function save it right above stack frame. */ |
| 4907 | type = TREE_TYPE (sav); |
| 4908 | t = make_tree (type, frame_pointer_rtx); |
| 4909 | if (!ix86_varargs_gpr_size) |
| 4910 | t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX); |
| 4911 | |
| 4912 | t = build2 (MODIFY_EXPR, type, sav, t); |
| 4913 | TREE_SIDE_EFFECTS (t) = 1; |
| 4914 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
| 4915 | } |
| 4916 | } |
| 4917 | |
| 4918 | /* Implement va_arg. */ |
| 4919 | |
| 4920 | static tree |
| 4921 | ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, |
| 4922 | gimple_seq *post_p) |
| 4923 | { |
| 4924 | static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; |
| 4925 | tree f_gpr, f_fpr, f_ovf, f_sav; |
| 4926 | tree gpr, fpr, ovf, sav, t; |
| 4927 | int size, rsize; |
| 4928 | tree lab_false, lab_over = NULL_TREE; |
| 4929 | tree addr, t2; |
| 4930 | rtx container; |
| 4931 | int indirect_p = 0; |
| 4932 | tree ptrtype; |
| 4933 | machine_mode nat_mode; |
| 4934 | unsigned int arg_boundary; |
| 4935 | unsigned int type_align; |
| 4936 | |
| 4937 | /* Only 64bit target needs something special. */ |
| 4938 | if (is_va_list_char_pointer (TREE_TYPE (valist))) |
| 4939 | return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); |
| 4940 | |
| 4941 | f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); |
| 4942 | f_fpr = DECL_CHAIN (f_gpr); |
| 4943 | f_ovf = DECL_CHAIN (f_fpr); |
| 4944 | f_sav = DECL_CHAIN (f_ovf); |
| 4945 | |
| 4946 | gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), |
| 4947 | valist, f_gpr, NULL_TREE); |
| 4948 | |
| 4949 | fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); |
| 4950 | ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); |
| 4951 | sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); |
| 4952 | |
| 4953 | indirect_p = pass_va_arg_by_reference (type); |
| 4954 | if (indirect_p) |
| 4955 | type = build_pointer_type (type); |
| 4956 | size = arg_int_size_in_bytes (type); |
| 4957 | rsize = CEIL (size, UNITS_PER_WORD); |
| 4958 | |
| 4959 | nat_mode = type_natural_mode (type, NULL, in_return: false); |
| 4960 | switch (nat_mode) |
| 4961 | { |
| 4962 | case E_V16HFmode: |
| 4963 | case E_V16BFmode: |
| 4964 | case E_V8SFmode: |
| 4965 | case E_V8SImode: |
| 4966 | case E_V32QImode: |
| 4967 | case E_V16HImode: |
| 4968 | case E_V4DFmode: |
| 4969 | case E_V4DImode: |
| 4970 | case E_V32HFmode: |
| 4971 | case E_V32BFmode: |
| 4972 | case E_V16SFmode: |
| 4973 | case E_V16SImode: |
| 4974 | case E_V64QImode: |
| 4975 | case E_V32HImode: |
| 4976 | case E_V8DFmode: |
| 4977 | case E_V8DImode: |
| 4978 | /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ |
| 4979 | if (!TARGET_64BIT_MS_ABI) |
| 4980 | { |
| 4981 | container = NULL; |
| 4982 | break; |
| 4983 | } |
| 4984 | /* FALLTHRU */ |
| 4985 | |
| 4986 | default: |
| 4987 | container = construct_container (mode: nat_mode, TYPE_MODE (type), |
| 4988 | type, in_return: 0, X86_64_REGPARM_MAX, |
| 4989 | X86_64_SSE_REGPARM_MAX, intreg, |
| 4990 | sse_regno: 0); |
| 4991 | break; |
| 4992 | } |
| 4993 | |
| 4994 | /* Pull the value out of the saved registers. */ |
| 4995 | |
| 4996 | addr = create_tmp_var (ptr_type_node, "addr" ); |
| 4997 | type_align = TYPE_ALIGN (type); |
| 4998 | |
| 4999 | if (container) |
| 5000 | { |
| 5001 | int needed_intregs, needed_sseregs; |
| 5002 | bool need_temp; |
| 5003 | tree int_addr, sse_addr; |
| 5004 | |
| 5005 | lab_false = create_artificial_label (UNKNOWN_LOCATION); |
| 5006 | lab_over = create_artificial_label (UNKNOWN_LOCATION); |
| 5007 | |
| 5008 | examine_argument (mode: nat_mode, type, in_return: 0, int_nregs: &needed_intregs, sse_nregs: &needed_sseregs); |
| 5009 | |
| 5010 | bool container_in_reg = false; |
| 5011 | if (REG_P (container)) |
| 5012 | container_in_reg = true; |
| 5013 | else if (GET_CODE (container) == PARALLEL |
| 5014 | && GET_MODE (container) == BLKmode |
| 5015 | && XVECLEN (container, 0) == 1) |
| 5016 | { |
| 5017 | /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST |
| 5018 | expression in a TImode register. In this case, temp isn't |
| 5019 | needed. Otherwise, the TImode variable will be put in the |
| 5020 | GPR save area which guarantees only 8-byte alignment. */ |
| 5021 | rtx x = XVECEXP (container, 0, 0); |
| 5022 | if (GET_CODE (x) == EXPR_LIST |
| 5023 | && REG_P (XEXP (x, 0)) |
| 5024 | && XEXP (x, 1) == const0_rtx) |
| 5025 | container_in_reg = true; |
| 5026 | } |
| 5027 | |
| 5028 | need_temp = (!container_in_reg |
| 5029 | && ((needed_intregs && TYPE_ALIGN (type) > 64) |
| 5030 | || TYPE_ALIGN (type) > 128)); |
| 5031 | |
| 5032 | /* In case we are passing structure, verify that it is consecutive block |
| 5033 | on the register save area. If not we need to do moves. */ |
| 5034 | if (!need_temp && !container_in_reg) |
| 5035 | { |
| 5036 | /* Verify that all registers are strictly consecutive */ |
| 5037 | if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) |
| 5038 | { |
| 5039 | int i; |
| 5040 | |
| 5041 | for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) |
| 5042 | { |
| 5043 | rtx slot = XVECEXP (container, 0, i); |
| 5044 | if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i |
| 5045 | || INTVAL (XEXP (slot, 1)) != i * 16) |
| 5046 | need_temp = true; |
| 5047 | } |
| 5048 | } |
| 5049 | else |
| 5050 | { |
| 5051 | int i; |
| 5052 | |
| 5053 | for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) |
| 5054 | { |
| 5055 | rtx slot = XVECEXP (container, 0, i); |
| 5056 | if (REGNO (XEXP (slot, 0)) != (unsigned int) i |
| 5057 | || INTVAL (XEXP (slot, 1)) != i * 8) |
| 5058 | need_temp = true; |
| 5059 | } |
| 5060 | } |
| 5061 | } |
| 5062 | if (!need_temp) |
| 5063 | { |
| 5064 | int_addr = addr; |
| 5065 | sse_addr = addr; |
| 5066 | } |
| 5067 | else |
| 5068 | { |
| 5069 | int_addr = create_tmp_var (ptr_type_node, "int_addr" ); |
| 5070 | sse_addr = create_tmp_var (ptr_type_node, "sse_addr" ); |
| 5071 | } |
| 5072 | |
| 5073 | /* First ensure that we fit completely in registers. */ |
| 5074 | if (needed_intregs) |
| 5075 | { |
| 5076 | t = build_int_cst (TREE_TYPE (gpr), |
| 5077 | (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); |
| 5078 | t = build2 (GE_EXPR, boolean_type_node, gpr, t); |
| 5079 | t2 = build1 (GOTO_EXPR, void_type_node, lab_false); |
| 5080 | t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); |
| 5081 | gimplify_and_add (t, pre_p); |
| 5082 | } |
| 5083 | if (needed_sseregs) |
| 5084 | { |
| 5085 | t = build_int_cst (TREE_TYPE (fpr), |
| 5086 | (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 |
| 5087 | + X86_64_REGPARM_MAX * 8); |
| 5088 | t = build2 (GE_EXPR, boolean_type_node, fpr, t); |
| 5089 | t2 = build1 (GOTO_EXPR, void_type_node, lab_false); |
| 5090 | t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); |
| 5091 | gimplify_and_add (t, pre_p); |
| 5092 | } |
| 5093 | |
| 5094 | /* Compute index to start of area used for integer regs. */ |
| 5095 | if (needed_intregs) |
| 5096 | { |
| 5097 | /* int_addr = gpr + sav; */ |
| 5098 | t = fold_build_pointer_plus (sav, gpr); |
| 5099 | gimplify_assign (int_addr, t, pre_p); |
| 5100 | } |
| 5101 | if (needed_sseregs) |
| 5102 | { |
| 5103 | /* sse_addr = fpr + sav; */ |
| 5104 | t = fold_build_pointer_plus (sav, fpr); |
| 5105 | gimplify_assign (sse_addr, t, pre_p); |
| 5106 | } |
| 5107 | if (need_temp) |
| 5108 | { |
| 5109 | int i, prev_size = 0; |
| 5110 | tree temp = create_tmp_var (type, "va_arg_tmp" ); |
| 5111 | TREE_ADDRESSABLE (temp) = 1; |
| 5112 | |
| 5113 | /* addr = &temp; */ |
| 5114 | t = build1 (ADDR_EXPR, build_pointer_type (type), temp); |
| 5115 | gimplify_assign (addr, t, pre_p); |
| 5116 | |
| 5117 | for (i = 0; i < XVECLEN (container, 0); i++) |
| 5118 | { |
| 5119 | rtx slot = XVECEXP (container, 0, i); |
| 5120 | rtx reg = XEXP (slot, 0); |
| 5121 | machine_mode mode = GET_MODE (reg); |
| 5122 | tree piece_type; |
| 5123 | tree addr_type; |
| 5124 | tree daddr_type; |
| 5125 | tree src_addr, src; |
| 5126 | int src_offset; |
| 5127 | tree dest_addr, dest; |
| 5128 | int cur_size = GET_MODE_SIZE (mode); |
| 5129 | |
| 5130 | gcc_assert (prev_size <= INTVAL (XEXP (slot, 1))); |
| 5131 | prev_size = INTVAL (XEXP (slot, 1)); |
| 5132 | if (prev_size + cur_size > size) |
| 5133 | { |
| 5134 | cur_size = size - prev_size; |
| 5135 | unsigned int nbits = cur_size * BITS_PER_UNIT; |
| 5136 | if (!int_mode_for_size (size: nbits, limit: 1).exists (mode: &mode)) |
| 5137 | mode = QImode; |
| 5138 | } |
| 5139 | piece_type = lang_hooks.types.type_for_mode (mode, 1); |
| 5140 | if (mode == GET_MODE (reg)) |
| 5141 | addr_type = build_pointer_type (piece_type); |
| 5142 | else |
| 5143 | addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, |
| 5144 | true); |
| 5145 | daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, |
| 5146 | true); |
| 5147 | |
| 5148 | if (SSE_REGNO_P (REGNO (reg))) |
| 5149 | { |
| 5150 | src_addr = sse_addr; |
| 5151 | src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; |
| 5152 | } |
| 5153 | else |
| 5154 | { |
| 5155 | src_addr = int_addr; |
| 5156 | src_offset = REGNO (reg) * 8; |
| 5157 | } |
| 5158 | src_addr = fold_convert (addr_type, src_addr); |
| 5159 | src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset); |
| 5160 | |
| 5161 | dest_addr = fold_convert (daddr_type, addr); |
| 5162 | dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size); |
| 5163 | if (cur_size == GET_MODE_SIZE (mode)) |
| 5164 | { |
| 5165 | src = build_va_arg_indirect_ref (src_addr); |
| 5166 | dest = build_va_arg_indirect_ref (dest_addr); |
| 5167 | |
| 5168 | gimplify_assign (dest, src, pre_p); |
| 5169 | } |
| 5170 | else |
| 5171 | { |
| 5172 | tree copy |
| 5173 | = build_call_expr (builtin_decl_implicit (fncode: BUILT_IN_MEMCPY), |
| 5174 | 3, dest_addr, src_addr, |
| 5175 | size_int (cur_size)); |
| 5176 | gimplify_and_add (copy, pre_p); |
| 5177 | } |
| 5178 | prev_size += cur_size; |
| 5179 | } |
| 5180 | } |
| 5181 | |
| 5182 | if (needed_intregs) |
| 5183 | { |
| 5184 | t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, |
| 5185 | build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); |
| 5186 | gimplify_assign (gpr, t, pre_p); |
| 5187 | /* The GPR save area guarantees only 8-byte alignment. */ |
| 5188 | if (!need_temp) |
| 5189 | type_align = MIN (type_align, 64); |
| 5190 | } |
| 5191 | |
| 5192 | if (needed_sseregs) |
| 5193 | { |
| 5194 | t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, |
| 5195 | build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); |
| 5196 | gimplify_assign (unshare_expr (fpr), t, pre_p); |
| 5197 | } |
| 5198 | |
| 5199 | gimple_seq_add_stmt (pre_p, gimple_build_goto (dest: lab_over)); |
| 5200 | |
| 5201 | gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_false)); |
| 5202 | } |
| 5203 | |
| 5204 | /* ... otherwise out of the overflow area. */ |
| 5205 | |
| 5206 | /* When we align parameter on stack for caller, if the parameter |
| 5207 | alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be |
| 5208 | aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee |
| 5209 | here with caller. */ |
| 5210 | arg_boundary = ix86_function_arg_boundary (VOIDmode, type); |
| 5211 | if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) |
| 5212 | arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; |
| 5213 | |
| 5214 | /* Care for on-stack alignment if needed. */ |
| 5215 | if (arg_boundary <= 64 || size == 0) |
| 5216 | t = ovf; |
| 5217 | else |
| 5218 | { |
| 5219 | HOST_WIDE_INT align = arg_boundary / 8; |
| 5220 | t = fold_build_pointer_plus_hwi (ovf, align - 1); |
| 5221 | t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, |
| 5222 | build_int_cst (TREE_TYPE (t), -align)); |
| 5223 | } |
| 5224 | |
| 5225 | gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); |
| 5226 | gimplify_assign (addr, t, pre_p); |
| 5227 | |
| 5228 | t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD); |
| 5229 | gimplify_assign (unshare_expr (ovf), t, pre_p); |
| 5230 | |
| 5231 | if (container) |
| 5232 | gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_over)); |
| 5233 | |
| 5234 | type = build_aligned_type (type, type_align); |
| 5235 | ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); |
| 5236 | addr = fold_convert (ptrtype, addr); |
| 5237 | |
| 5238 | if (indirect_p) |
| 5239 | addr = build_va_arg_indirect_ref (addr); |
| 5240 | return build_va_arg_indirect_ref (addr); |
| 5241 | } |
| 5242 | |
| 5243 | /* Return true if OPNUM's MEM should be matched |
| 5244 | in movabs* patterns. */ |
| 5245 | |
| 5246 | bool |
| 5247 | ix86_check_movabs (rtx insn, int opnum) |
| 5248 | { |
| 5249 | rtx set, mem; |
| 5250 | |
| 5251 | set = PATTERN (insn); |
| 5252 | if (GET_CODE (set) == PARALLEL) |
| 5253 | set = XVECEXP (set, 0, 0); |
| 5254 | gcc_assert (GET_CODE (set) == SET); |
| 5255 | mem = XEXP (set, opnum); |
| 5256 | while (SUBREG_P (mem)) |
| 5257 | mem = SUBREG_REG (mem); |
| 5258 | gcc_assert (MEM_P (mem)); |
| 5259 | return volatile_ok || !MEM_VOLATILE_P (mem); |
| 5260 | } |
| 5261 | |
| 5262 | /* Return true if XVECEXP idx of INSN satisfies MOVS arguments. */ |
| 5263 | bool |
| 5264 | ix86_check_movs (rtx insn, int idx) |
| 5265 | { |
| 5266 | rtx pat = PATTERN (insn); |
| 5267 | gcc_assert (GET_CODE (pat) == PARALLEL); |
| 5268 | |
| 5269 | rtx set = XVECEXP (pat, 0, idx); |
| 5270 | gcc_assert (GET_CODE (set) == SET); |
| 5271 | |
| 5272 | rtx dst = SET_DEST (set); |
| 5273 | gcc_assert (MEM_P (dst)); |
| 5274 | |
| 5275 | rtx src = SET_SRC (set); |
| 5276 | gcc_assert (MEM_P (src)); |
| 5277 | |
| 5278 | return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)) |
| 5279 | && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)) |
| 5280 | || Pmode == word_mode)); |
| 5281 | } |
| 5282 | |
| 5283 | /* Return false if INSN contains a MEM with a non-default address space. */ |
| 5284 | bool |
| 5285 | ix86_check_no_addr_space (rtx insn) |
| 5286 | { |
| 5287 | subrtx_var_iterator::array_type array; |
| 5288 | FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL) |
| 5289 | { |
| 5290 | rtx x = *iter; |
| 5291 | if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))) |
| 5292 | return false; |
| 5293 | } |
| 5294 | return true; |
| 5295 | } |
| 5296 | |
| 5297 | /* Initialize the table of extra 80387 mathematical constants. */ |
| 5298 | |
| 5299 | static void |
| 5300 | init_ext_80387_constants (void) |
| 5301 | { |
| 5302 | static const char * cst[5] = |
| 5303 | { |
| 5304 | "0.3010299956639811952256464283594894482" , /* 0: fldlg2 */ |
| 5305 | "0.6931471805599453094286904741849753009" , /* 1: fldln2 */ |
| 5306 | "1.4426950408889634073876517827983434472" , /* 2: fldl2e */ |
| 5307 | "3.3219280948873623478083405569094566090" , /* 3: fldl2t */ |
| 5308 | "3.1415926535897932385128089594061862044" , /* 4: fldpi */ |
| 5309 | }; |
| 5310 | int i; |
| 5311 | |
| 5312 | for (i = 0; i < 5; i++) |
| 5313 | { |
| 5314 | real_from_string (&ext_80387_constants_table[i], cst[i]); |
| 5315 | /* Ensure each constant is rounded to XFmode precision. */ |
| 5316 | real_convert (&ext_80387_constants_table[i], |
| 5317 | XFmode, &ext_80387_constants_table[i]); |
| 5318 | } |
| 5319 | |
| 5320 | ext_80387_constants_init = 1; |
| 5321 | } |
| 5322 | |
| 5323 | /* Return non-zero if the constant is something that |
| 5324 | can be loaded with a special instruction. */ |
| 5325 | |
| 5326 | int |
| 5327 | standard_80387_constant_p (rtx x) |
| 5328 | { |
| 5329 | machine_mode mode = GET_MODE (x); |
| 5330 | |
| 5331 | const REAL_VALUE_TYPE *r; |
| 5332 | |
| 5333 | if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode))) |
| 5334 | return -1; |
| 5335 | |
| 5336 | if (x == CONST0_RTX (mode)) |
| 5337 | return 1; |
| 5338 | if (x == CONST1_RTX (mode)) |
| 5339 | return 2; |
| 5340 | |
| 5341 | r = CONST_DOUBLE_REAL_VALUE (x); |
| 5342 | |
| 5343 | /* For XFmode constants, try to find a special 80387 instruction when |
| 5344 | optimizing for size or on those CPUs that benefit from them. */ |
| 5345 | if (mode == XFmode |
| 5346 | && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS) |
| 5347 | && !flag_rounding_math) |
| 5348 | { |
| 5349 | int i; |
| 5350 | |
| 5351 | if (! ext_80387_constants_init) |
| 5352 | init_ext_80387_constants (); |
| 5353 | |
| 5354 | for (i = 0; i < 5; i++) |
| 5355 | if (real_identical (r, &ext_80387_constants_table[i])) |
| 5356 | return i + 3; |
| 5357 | } |
| 5358 | |
| 5359 | /* Load of the constant -0.0 or -1.0 will be split as |
| 5360 | fldz;fchs or fld1;fchs sequence. */ |
| 5361 | if (real_isnegzero (r)) |
| 5362 | return 8; |
| 5363 | if (real_identical (r, &dconstm1)) |
| 5364 | return 9; |
| 5365 | |
| 5366 | return 0; |
| 5367 | } |
| 5368 | |
| 5369 | /* Return the opcode of the special instruction to be used to load |
| 5370 | the constant X. */ |
| 5371 | |
| 5372 | const char * |
| 5373 | standard_80387_constant_opcode (rtx x) |
| 5374 | { |
| 5375 | switch (standard_80387_constant_p (x)) |
| 5376 | { |
| 5377 | case 1: |
| 5378 | return "fldz" ; |
| 5379 | case 2: |
| 5380 | return "fld1" ; |
| 5381 | case 3: |
| 5382 | return "fldlg2" ; |
| 5383 | case 4: |
| 5384 | return "fldln2" ; |
| 5385 | case 5: |
| 5386 | return "fldl2e" ; |
| 5387 | case 6: |
| 5388 | return "fldl2t" ; |
| 5389 | case 7: |
| 5390 | return "fldpi" ; |
| 5391 | case 8: |
| 5392 | case 9: |
| 5393 | return "#" ; |
| 5394 | default: |
| 5395 | gcc_unreachable (); |
| 5396 | } |
| 5397 | } |
| 5398 | |
| 5399 | /* Return the CONST_DOUBLE representing the 80387 constant that is |
| 5400 | loaded by the specified special instruction. The argument IDX |
| 5401 | matches the return value from standard_80387_constant_p. */ |
| 5402 | |
| 5403 | rtx |
| 5404 | standard_80387_constant_rtx (int idx) |
| 5405 | { |
| 5406 | int i; |
| 5407 | |
| 5408 | if (! ext_80387_constants_init) |
| 5409 | init_ext_80387_constants (); |
| 5410 | |
| 5411 | switch (idx) |
| 5412 | { |
| 5413 | case 3: |
| 5414 | case 4: |
| 5415 | case 5: |
| 5416 | case 6: |
| 5417 | case 7: |
| 5418 | i = idx - 3; |
| 5419 | break; |
| 5420 | |
| 5421 | default: |
| 5422 | gcc_unreachable (); |
| 5423 | } |
| 5424 | |
| 5425 | return const_double_from_real_value (ext_80387_constants_table[i], |
| 5426 | XFmode); |
| 5427 | } |
| 5428 | |
| 5429 | /* Return 1 if X is all bits 0, 2 if X is all bits 1 |
| 5430 | and 3 if X is all bits 1 with zero extend |
| 5431 | in supported SSE/AVX vector mode. */ |
| 5432 | |
| 5433 | int |
| 5434 | standard_sse_constant_p (rtx x, machine_mode pred_mode) |
| 5435 | { |
| 5436 | machine_mode mode; |
| 5437 | |
| 5438 | if (!TARGET_SSE) |
| 5439 | return 0; |
| 5440 | |
| 5441 | mode = GET_MODE (x); |
| 5442 | |
| 5443 | if (x == const0_rtx || const0_operand (x, mode)) |
| 5444 | return 1; |
| 5445 | |
| 5446 | if (x == constm1_rtx |
| 5447 | || vector_all_ones_operand (x, mode) |
| 5448 | || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT |
| 5449 | || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT) |
| 5450 | && float_vector_all_ones_operand (x, mode))) |
| 5451 | { |
| 5452 | /* VOIDmode integer constant, get mode from the predicate. */ |
| 5453 | if (mode == VOIDmode) |
| 5454 | mode = pred_mode; |
| 5455 | |
| 5456 | switch (GET_MODE_SIZE (mode)) |
| 5457 | { |
| 5458 | case 64: |
| 5459 | if (TARGET_AVX512F) |
| 5460 | return 2; |
| 5461 | break; |
| 5462 | case 32: |
| 5463 | if (TARGET_AVX2) |
| 5464 | return 2; |
| 5465 | break; |
| 5466 | case 16: |
| 5467 | if (TARGET_SSE2) |
| 5468 | return 2; |
| 5469 | break; |
| 5470 | case 0: |
| 5471 | /* VOIDmode */ |
| 5472 | gcc_unreachable (); |
| 5473 | default: |
| 5474 | break; |
| 5475 | } |
| 5476 | } |
| 5477 | |
| 5478 | if (vector_all_ones_zero_extend_half_operand (x, mode) |
| 5479 | || vector_all_ones_zero_extend_quarter_operand (x, mode)) |
| 5480 | return 3; |
| 5481 | |
| 5482 | return 0; |
| 5483 | } |
| 5484 | |
| 5485 | /* Return the opcode of the special instruction to be used to load |
| 5486 | the constant operands[1] into operands[0]. */ |
| 5487 | |
| 5488 | const char * |
| 5489 | standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) |
| 5490 | { |
| 5491 | machine_mode mode; |
| 5492 | rtx x = operands[1]; |
| 5493 | |
| 5494 | gcc_assert (TARGET_SSE); |
| 5495 | |
| 5496 | mode = GET_MODE (x); |
| 5497 | |
| 5498 | if (x == const0_rtx || const0_operand (x, mode)) |
| 5499 | { |
| 5500 | switch (get_attr_mode (insn)) |
| 5501 | { |
| 5502 | case MODE_TI: |
| 5503 | if (!EXT_REX_SSE_REG_P (operands[0])) |
| 5504 | return "%vpxor\t%0, %d0" ; |
| 5505 | /* FALLTHRU */ |
| 5506 | case MODE_XI: |
| 5507 | case MODE_OI: |
| 5508 | if (EXT_REX_SSE_REG_P (operands[0])) |
| 5509 | { |
| 5510 | if (TARGET_AVX512VL) |
| 5511 | return "vpxord\t%x0, %x0, %x0" ; |
| 5512 | else |
| 5513 | return "vpxord\t%g0, %g0, %g0" ; |
| 5514 | } |
| 5515 | return "vpxor\t%x0, %x0, %x0" ; |
| 5516 | |
| 5517 | case MODE_V2DF: |
| 5518 | if (!EXT_REX_SSE_REG_P (operands[0])) |
| 5519 | return "%vxorpd\t%0, %d0" ; |
| 5520 | /* FALLTHRU */ |
| 5521 | case MODE_V8DF: |
| 5522 | case MODE_V4DF: |
| 5523 | if (EXT_REX_SSE_REG_P (operands[0])) |
| 5524 | { |
| 5525 | if (TARGET_AVX512DQ) |
| 5526 | { |
| 5527 | if (TARGET_AVX512VL) |
| 5528 | return "vxorpd\t%x0, %x0, %x0" ; |
| 5529 | else |
| 5530 | return "vxorpd\t%g0, %g0, %g0" ; |
| 5531 | } |
| 5532 | else |
| 5533 | { |
| 5534 | if (TARGET_AVX512VL) |
| 5535 | return "vpxorq\t%x0, %x0, %x0" ; |
| 5536 | else |
| 5537 | return "vpxorq\t%g0, %g0, %g0" ; |
| 5538 | } |
| 5539 | } |
| 5540 | return "vxorpd\t%x0, %x0, %x0" ; |
| 5541 | |
| 5542 | case MODE_V4SF: |
| 5543 | if (!EXT_REX_SSE_REG_P (operands[0])) |
| 5544 | return "%vxorps\t%0, %d0" ; |
| 5545 | /* FALLTHRU */ |
| 5546 | case MODE_V16SF: |
| 5547 | case MODE_V8SF: |
| 5548 | if (EXT_REX_SSE_REG_P (operands[0])) |
| 5549 | { |
| 5550 | if (TARGET_AVX512DQ) |
| 5551 | { |
| 5552 | if (TARGET_AVX512VL) |
| 5553 | return "vxorps\t%x0, %x0, %x0" ; |
| 5554 | else |
| 5555 | return "vxorps\t%g0, %g0, %g0" ; |
| 5556 | } |
| 5557 | else |
| 5558 | { |
| 5559 | if (TARGET_AVX512VL) |
| 5560 | return "vpxord\t%x0, %x0, %x0" ; |
| 5561 | else |
| 5562 | return "vpxord\t%g0, %g0, %g0" ; |
| 5563 | } |
| 5564 | } |
| 5565 | return "vxorps\t%x0, %x0, %x0" ; |
| 5566 | |
| 5567 | default: |
| 5568 | gcc_unreachable (); |
| 5569 | } |
| 5570 | } |
| 5571 | else if (x == constm1_rtx |
| 5572 | || vector_all_ones_operand (x, mode) |
| 5573 | || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT |
| 5574 | && float_vector_all_ones_operand (x, mode))) |
| 5575 | { |
| 5576 | enum attr_mode insn_mode = get_attr_mode (insn); |
| 5577 | |
| 5578 | switch (insn_mode) |
| 5579 | { |
| 5580 | case MODE_XI: |
| 5581 | case MODE_V8DF: |
| 5582 | case MODE_V16SF: |
| 5583 | gcc_assert (TARGET_AVX512F); |
| 5584 | return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}" ; |
| 5585 | |
| 5586 | case MODE_OI: |
| 5587 | case MODE_V4DF: |
| 5588 | case MODE_V8SF: |
| 5589 | gcc_assert (TARGET_AVX2); |
| 5590 | /* FALLTHRU */ |
| 5591 | case MODE_TI: |
| 5592 | case MODE_V2DF: |
| 5593 | case MODE_V4SF: |
| 5594 | gcc_assert (TARGET_SSE2); |
| 5595 | if (EXT_REX_SSE_REG_P (operands[0])) |
| 5596 | { |
| 5597 | if (TARGET_AVX512VL) |
| 5598 | return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}" ; |
| 5599 | else |
| 5600 | return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}" ; |
| 5601 | } |
| 5602 | return (TARGET_AVX |
| 5603 | ? "vpcmpeqd\t%0, %0, %0" |
| 5604 | : "pcmpeqd\t%0, %0" ); |
| 5605 | |
| 5606 | default: |
| 5607 | gcc_unreachable (); |
| 5608 | } |
| 5609 | } |
| 5610 | else if (vector_all_ones_zero_extend_half_operand (x, mode)) |
| 5611 | { |
| 5612 | if (GET_MODE_SIZE (mode) == 64) |
| 5613 | { |
| 5614 | gcc_assert (TARGET_AVX512F); |
| 5615 | return "vpcmpeqd\t%t0, %t0, %t0" ; |
| 5616 | } |
| 5617 | else if (GET_MODE_SIZE (mode) == 32) |
| 5618 | { |
| 5619 | gcc_assert (TARGET_AVX); |
| 5620 | return "vpcmpeqd\t%x0, %x0, %x0" ; |
| 5621 | } |
| 5622 | gcc_unreachable (); |
| 5623 | } |
| 5624 | else if (vector_all_ones_zero_extend_quarter_operand (x, mode)) |
| 5625 | { |
| 5626 | gcc_assert (TARGET_AVX512F); |
| 5627 | return "vpcmpeqd\t%x0, %x0, %x0" ; |
| 5628 | } |
| 5629 | |
| 5630 | gcc_unreachable (); |
| 5631 | } |
| 5632 | |
| 5633 | /* Returns true if INSN can be transformed from a memory load |
| 5634 | to a supported FP constant load. */ |
| 5635 | |
| 5636 | bool |
| 5637 | ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst) |
| 5638 | { |
| 5639 | rtx src = find_constant_src (insn); |
| 5640 | |
| 5641 | gcc_assert (REG_P (dst)); |
| 5642 | |
| 5643 | if (src == NULL |
| 5644 | || (SSE_REGNO_P (REGNO (dst)) |
| 5645 | && standard_sse_constant_p (x: src, GET_MODE (dst)) != 1) |
| 5646 | || (!TARGET_AVX512VL |
| 5647 | && EXT_REX_SSE_REGNO_P (REGNO (dst)) |
| 5648 | && standard_sse_constant_p (x: src, GET_MODE (dst)) == 1) |
| 5649 | || (STACK_REGNO_P (REGNO (dst)) |
| 5650 | && standard_80387_constant_p (x: src) < 1)) |
| 5651 | return false; |
| 5652 | |
| 5653 | return true; |
| 5654 | } |
| 5655 | |
| 5656 | /* Predicate for pre-reload splitters with associated instructions, |
| 5657 | which can match any time before the split1 pass (usually combine), |
| 5658 | then are unconditionally split in that pass and should not be |
| 5659 | matched again afterwards. */ |
| 5660 | |
| 5661 | bool |
| 5662 | ix86_pre_reload_split (void) |
| 5663 | { |
| 5664 | return (can_create_pseudo_p () |
| 5665 | && !(cfun->curr_properties & PROP_rtl_split_insns)); |
| 5666 | } |
| 5667 | |
| 5668 | /* Return the opcode of the TYPE_SSEMOV instruction. To move from |
| 5669 | or to xmm16-xmm31/ymm16-ymm31 registers, we either require |
| 5670 | TARGET_AVX512VL or it is a register to register move which can |
| 5671 | be done with zmm register move. */ |
| 5672 | |
| 5673 | static const char * |
| 5674 | ix86_get_ssemov (rtx *operands, unsigned size, |
| 5675 | enum attr_mode insn_mode, machine_mode mode) |
| 5676 | { |
| 5677 | char buf[128]; |
| 5678 | bool misaligned_p = (misaligned_operand (operands[0], mode) |
| 5679 | || misaligned_operand (operands[1], mode)); |
| 5680 | bool evex_reg_p = (size == 64 |
| 5681 | || EXT_REX_SSE_REG_P (operands[0]) |
| 5682 | || EXT_REX_SSE_REG_P (operands[1])); |
| 5683 | |
| 5684 | bool egpr_p = (TARGET_APX_EGPR |
| 5685 | && (x86_extended_rex2reg_mentioned_p (operands[0]) |
| 5686 | || x86_extended_rex2reg_mentioned_p (operands[1]))); |
| 5687 | bool egpr_vl = egpr_p && TARGET_AVX512VL; |
| 5688 | |
| 5689 | machine_mode scalar_mode; |
| 5690 | |
| 5691 | const char *opcode = NULL; |
| 5692 | enum |
| 5693 | { |
| 5694 | opcode_int, |
| 5695 | opcode_float, |
| 5696 | opcode_double |
| 5697 | } type = opcode_int; |
| 5698 | |
| 5699 | switch (insn_mode) |
| 5700 | { |
| 5701 | case MODE_V16SF: |
| 5702 | case MODE_V8SF: |
| 5703 | case MODE_V4SF: |
| 5704 | scalar_mode = E_SFmode; |
| 5705 | type = opcode_float; |
| 5706 | break; |
| 5707 | case MODE_V8DF: |
| 5708 | case MODE_V4DF: |
| 5709 | case MODE_V2DF: |
| 5710 | scalar_mode = E_DFmode; |
| 5711 | type = opcode_double; |
| 5712 | break; |
| 5713 | case MODE_XI: |
| 5714 | case MODE_OI: |
| 5715 | case MODE_TI: |
| 5716 | scalar_mode = GET_MODE_INNER (mode); |
| 5717 | break; |
| 5718 | default: |
| 5719 | gcc_unreachable (); |
| 5720 | } |
| 5721 | |
| 5722 | /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL, |
| 5723 | we can only use zmm register move without memory operand. */ |
| 5724 | if (evex_reg_p |
| 5725 | && !TARGET_AVX512VL |
| 5726 | && GET_MODE_SIZE (mode) < 64) |
| 5727 | { |
| 5728 | /* NB: Even though ix86_hard_regno_mode_ok doesn't allow |
| 5729 | xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when |
| 5730 | AVX512VL is disabled, LRA can still generate reg to |
| 5731 | reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit |
| 5732 | modes. */ |
| 5733 | if (memory_operand (operands[0], mode) |
| 5734 | || memory_operand (operands[1], mode)) |
| 5735 | gcc_unreachable (); |
| 5736 | size = 64; |
| 5737 | switch (type) |
| 5738 | { |
| 5739 | case opcode_int: |
| 5740 | if (scalar_mode == E_HFmode || scalar_mode == E_BFmode) |
| 5741 | opcode = (misaligned_p |
| 5742 | ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64" ) |
| 5743 | : "vmovdqa64" ); |
| 5744 | else |
| 5745 | opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32" ; |
| 5746 | break; |
| 5747 | case opcode_float: |
| 5748 | opcode = misaligned_p ? "vmovups" : "vmovaps" ; |
| 5749 | break; |
| 5750 | case opcode_double: |
| 5751 | opcode = misaligned_p ? "vmovupd" : "vmovapd" ; |
| 5752 | break; |
| 5753 | } |
| 5754 | } |
| 5755 | else if (SCALAR_FLOAT_MODE_P (scalar_mode)) |
| 5756 | { |
| 5757 | switch (scalar_mode) |
| 5758 | { |
| 5759 | case E_HFmode: |
| 5760 | case E_BFmode: |
| 5761 | if (evex_reg_p || egpr_vl) |
| 5762 | opcode = (misaligned_p |
| 5763 | ? (TARGET_AVX512BW |
| 5764 | ? "vmovdqu16" |
| 5765 | : "vmovdqu64" ) |
| 5766 | : "vmovdqa64" ); |
| 5767 | else if (egpr_p) |
| 5768 | opcode = (misaligned_p |
| 5769 | ? (TARGET_AVX512BW |
| 5770 | ? "vmovdqu16" |
| 5771 | : "%vmovups" ) |
| 5772 | : "%vmovaps" ); |
| 5773 | else |
| 5774 | opcode = (misaligned_p |
| 5775 | ? (TARGET_AVX512BW && evex_reg_p |
| 5776 | ? "vmovdqu16" |
| 5777 | : "%vmovdqu" ) |
| 5778 | : "%vmovdqa" ); |
| 5779 | break; |
| 5780 | case E_SFmode: |
| 5781 | opcode = misaligned_p ? "%vmovups" : "%vmovaps" ; |
| 5782 | break; |
| 5783 | case E_DFmode: |
| 5784 | opcode = misaligned_p ? "%vmovupd" : "%vmovapd" ; |
| 5785 | break; |
| 5786 | case E_TFmode: |
| 5787 | if (evex_reg_p || egpr_vl) |
| 5788 | opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64" ; |
| 5789 | else if (egpr_p) |
| 5790 | opcode = misaligned_p ? "%vmovups" : "%vmovaps" ; |
| 5791 | else |
| 5792 | opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa" ; |
| 5793 | break; |
| 5794 | default: |
| 5795 | gcc_unreachable (); |
| 5796 | } |
| 5797 | } |
| 5798 | else if (SCALAR_INT_MODE_P (scalar_mode)) |
| 5799 | { |
| 5800 | switch (scalar_mode) |
| 5801 | { |
| 5802 | case E_QImode: |
| 5803 | if (evex_reg_p || egpr_vl) |
| 5804 | opcode = (misaligned_p |
| 5805 | ? (TARGET_AVX512BW |
| 5806 | ? "vmovdqu8" |
| 5807 | : "vmovdqu64" ) |
| 5808 | : "vmovdqa64" ); |
| 5809 | else if (egpr_p) |
| 5810 | opcode = (misaligned_p |
| 5811 | ? (TARGET_AVX512BW |
| 5812 | ? "vmovdqu8" |
| 5813 | : "%vmovups" ) |
| 5814 | : "%vmovaps" ); |
| 5815 | else |
| 5816 | opcode = (misaligned_p |
| 5817 | ? (TARGET_AVX512BW && evex_reg_p |
| 5818 | ? "vmovdqu8" |
| 5819 | : "%vmovdqu" ) |
| 5820 | : "%vmovdqa" ); |
| 5821 | break; |
| 5822 | case E_HImode: |
| 5823 | if (evex_reg_p || egpr_vl) |
| 5824 | opcode = (misaligned_p |
| 5825 | ? (TARGET_AVX512BW |
| 5826 | ? "vmovdqu16" |
| 5827 | : "vmovdqu64" ) |
| 5828 | : "vmovdqa64" ); |
| 5829 | else if (egpr_p) |
| 5830 | opcode = (misaligned_p |
| 5831 | ? (TARGET_AVX512BW |
| 5832 | ? "vmovdqu16" |
| 5833 | : "%vmovups" ) |
| 5834 | : "%vmovaps" ); |
| 5835 | else |
| 5836 | opcode = (misaligned_p |
| 5837 | ? (TARGET_AVX512BW && evex_reg_p |
| 5838 | ? "vmovdqu16" |
| 5839 | : "%vmovdqu" ) |
| 5840 | : "%vmovdqa" ); |
| 5841 | break; |
| 5842 | case E_SImode: |
| 5843 | if (evex_reg_p || egpr_vl) |
| 5844 | opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32" ; |
| 5845 | else if (egpr_p) |
| 5846 | opcode = misaligned_p ? "%vmovups" : "%vmovaps" ; |
| 5847 | else |
| 5848 | opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa" ; |
| 5849 | break; |
| 5850 | case E_DImode: |
| 5851 | case E_TImode: |
| 5852 | case E_OImode: |
| 5853 | if (evex_reg_p || egpr_vl) |
| 5854 | opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64" ; |
| 5855 | else if (egpr_p) |
| 5856 | opcode = misaligned_p ? "%vmovups" : "%vmovaps" ; |
| 5857 | else |
| 5858 | opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa" ; |
| 5859 | break; |
| 5860 | case E_XImode: |
| 5861 | opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64" ; |
| 5862 | break; |
| 5863 | default: |
| 5864 | gcc_unreachable (); |
| 5865 | } |
| 5866 | } |
| 5867 | else |
| 5868 | gcc_unreachable (); |
| 5869 | |
| 5870 | switch (size) |
| 5871 | { |
| 5872 | case 64: |
| 5873 | snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%g1, %%g0|%%g0, %%g1}" , |
| 5874 | opcode); |
| 5875 | break; |
| 5876 | case 32: |
| 5877 | snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%t1, %%t0|%%t0, %%t1}" , |
| 5878 | opcode); |
| 5879 | break; |
| 5880 | case 16: |
| 5881 | snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%x1, %%x0|%%x0, %%x1}" , |
| 5882 | opcode); |
| 5883 | break; |
| 5884 | default: |
| 5885 | gcc_unreachable (); |
| 5886 | } |
| 5887 | output_asm_insn (buf, operands); |
| 5888 | return "" ; |
| 5889 | } |
| 5890 | |
| 5891 | /* Return the template of the TYPE_SSEMOV instruction to move |
| 5892 | operands[1] into operands[0]. */ |
| 5893 | |
| 5894 | const char * |
| 5895 | ix86_output_ssemov (rtx_insn *insn, rtx *operands) |
| 5896 | { |
| 5897 | machine_mode mode = GET_MODE (operands[0]); |
| 5898 | if (get_attr_type (insn) != TYPE_SSEMOV |
| 5899 | || mode != GET_MODE (operands[1])) |
| 5900 | gcc_unreachable (); |
| 5901 | |
| 5902 | enum attr_mode insn_mode = get_attr_mode (insn); |
| 5903 | |
| 5904 | switch (insn_mode) |
| 5905 | { |
| 5906 | case MODE_XI: |
| 5907 | case MODE_V8DF: |
| 5908 | case MODE_V16SF: |
| 5909 | return ix86_get_ssemov (operands, size: 64, insn_mode, mode); |
| 5910 | |
| 5911 | case MODE_OI: |
| 5912 | case MODE_V4DF: |
| 5913 | case MODE_V8SF: |
| 5914 | return ix86_get_ssemov (operands, size: 32, insn_mode, mode); |
| 5915 | |
| 5916 | case MODE_TI: |
| 5917 | case MODE_V2DF: |
| 5918 | case MODE_V4SF: |
| 5919 | return ix86_get_ssemov (operands, size: 16, insn_mode, mode); |
| 5920 | |
| 5921 | case MODE_DI: |
| 5922 | /* Handle broken assemblers that require movd instead of movq. */ |
| 5923 | if (GENERAL_REG_P (operands[0])) |
| 5924 | { |
| 5925 | if (HAVE_AS_IX86_INTERUNIT_MOVQ) |
| 5926 | return "%vmovq\t{%1, %q0|%q0, %1}" ; |
| 5927 | else |
| 5928 | return "%vmovd\t{%1, %q0|%q0, %1}" ; |
| 5929 | } |
| 5930 | else if (GENERAL_REG_P (operands[1])) |
| 5931 | { |
| 5932 | if (HAVE_AS_IX86_INTERUNIT_MOVQ) |
| 5933 | return "%vmovq\t{%q1, %0|%0, %q1}" ; |
| 5934 | else |
| 5935 | return "%vmovd\t{%q1, %0|%0, %q1}" ; |
| 5936 | } |
| 5937 | else |
| 5938 | return "%vmovq\t{%1, %0|%0, %1}" ; |
| 5939 | |
| 5940 | case MODE_SI: |
| 5941 | if (GENERAL_REG_P (operands[0])) |
| 5942 | return "%vmovd\t{%1, %k0|%k0, %1}" ; |
| 5943 | else if (GENERAL_REG_P (operands[1])) |
| 5944 | return "%vmovd\t{%k1, %0|%0, %k1}" ; |
| 5945 | else |
| 5946 | return "%vmovd\t{%1, %0|%0, %1}" ; |
| 5947 | |
| 5948 | case MODE_HI: |
| 5949 | if (GENERAL_REG_P (operands[0])) |
| 5950 | return "vmovw\t{%1, %k0|%k0, %1}" ; |
| 5951 | else if (GENERAL_REG_P (operands[1])) |
| 5952 | return "vmovw\t{%k1, %0|%0, %k1}" ; |
| 5953 | else |
| 5954 | return "vmovw\t{%1, %0|%0, %1}" ; |
| 5955 | |
| 5956 | case MODE_DF: |
| 5957 | if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) |
| 5958 | return "vmovsd\t{%d1, %0|%0, %d1}" ; |
| 5959 | else |
| 5960 | return "%vmovsd\t{%1, %0|%0, %1}" ; |
| 5961 | |
| 5962 | case MODE_SF: |
| 5963 | if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) |
| 5964 | return "vmovss\t{%d1, %0|%0, %d1}" ; |
| 5965 | else |
| 5966 | return "%vmovss\t{%1, %0|%0, %1}" ; |
| 5967 | |
| 5968 | case MODE_HF: |
| 5969 | case MODE_BF: |
| 5970 | if (REG_P (operands[0]) && REG_P (operands[1])) |
| 5971 | return "vmovsh\t{%d1, %0|%0, %d1}" ; |
| 5972 | else |
| 5973 | return "vmovsh\t{%1, %0|%0, %1}" ; |
| 5974 | |
| 5975 | case MODE_V1DF: |
| 5976 | gcc_assert (!TARGET_AVX); |
| 5977 | return "movlpd\t{%1, %0|%0, %1}" ; |
| 5978 | |
| 5979 | case MODE_V2SF: |
| 5980 | if (TARGET_AVX && REG_P (operands[0])) |
| 5981 | return "vmovlps\t{%1, %d0|%d0, %1}" ; |
| 5982 | else |
| 5983 | return "%vmovlps\t{%1, %0|%0, %1}" ; |
| 5984 | |
| 5985 | default: |
| 5986 | gcc_unreachable (); |
| 5987 | } |
| 5988 | } |
| 5989 | |
| 5990 | /* Returns true if OP contains a symbol reference */ |
| 5991 | |
| 5992 | bool |
| 5993 | symbolic_reference_mentioned_p (rtx op) |
| 5994 | { |
| 5995 | const char *fmt; |
| 5996 | int i; |
| 5997 | |
| 5998 | if (SYMBOL_REF_P (op) || LABEL_REF_P (op)) |
| 5999 | return true; |
| 6000 | |
| 6001 | fmt = GET_RTX_FORMAT (GET_CODE (op)); |
| 6002 | for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) |
| 6003 | { |
| 6004 | if (fmt[i] == 'E') |
| 6005 | { |
| 6006 | int j; |
| 6007 | |
| 6008 | for (j = XVECLEN (op, i) - 1; j >= 0; j--) |
| 6009 | if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) |
| 6010 | return true; |
| 6011 | } |
| 6012 | |
| 6013 | else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) |
| 6014 | return true; |
| 6015 | } |
| 6016 | |
| 6017 | return false; |
| 6018 | } |
| 6019 | |
| 6020 | /* Return true if it is appropriate to emit `ret' instructions in the |
| 6021 | body of a function. Do this only if the epilogue is simple, needing a |
| 6022 | couple of insns. Prior to reloading, we can't tell how many registers |
| 6023 | must be saved, so return false then. Return false if there is no frame |
| 6024 | marker to de-allocate. */ |
| 6025 | |
| 6026 | bool |
| 6027 | ix86_can_use_return_insn_p (void) |
| 6028 | { |
| 6029 | if (ix86_function_ms_hook_prologue (fn: current_function_decl)) |
| 6030 | return false; |
| 6031 | |
| 6032 | if (ix86_function_naked (fn: current_function_decl)) |
| 6033 | return false; |
| 6034 | |
| 6035 | /* Don't use `ret' instruction in interrupt handler. */ |
| 6036 | if (! reload_completed |
| 6037 | || frame_pointer_needed |
| 6038 | || cfun->machine->func_type != TYPE_NORMAL) |
| 6039 | return 0; |
| 6040 | |
| 6041 | /* Don't allow more than 32k pop, since that's all we can do |
| 6042 | with one instruction. */ |
| 6043 | if (crtl->args.pops_args && crtl->args.size >= 32768) |
| 6044 | return 0; |
| 6045 | |
| 6046 | struct ix86_frame &frame = cfun->machine->frame; |
| 6047 | return (frame.stack_pointer_offset == UNITS_PER_WORD |
| 6048 | && (frame.nregs + frame.nsseregs) == 0); |
| 6049 | } |
| 6050 | |
| 6051 | /* Return stack frame size. get_frame_size () returns used stack slots |
| 6052 | during compilation, which may be optimized out later. If stack frame |
| 6053 | is needed, stack_frame_required should be true. */ |
| 6054 | |
| 6055 | static HOST_WIDE_INT |
| 6056 | ix86_get_frame_size (void) |
| 6057 | { |
| 6058 | if (cfun->machine->stack_frame_required) |
| 6059 | return get_frame_size (); |
| 6060 | else |
| 6061 | return 0; |
| 6062 | } |
| 6063 | |
| 6064 | /* Value should be nonzero if functions must have frame pointers. |
| 6065 | Zero means the frame pointer need not be set up (and parms may |
| 6066 | be accessed via the stack pointer) in functions that seem suitable. */ |
| 6067 | |
| 6068 | static bool |
| 6069 | ix86_frame_pointer_required (void) |
| 6070 | { |
| 6071 | /* If we accessed previous frames, then the generated code expects |
| 6072 | to be able to access the saved ebp value in our frame. */ |
| 6073 | if (cfun->machine->accesses_prev_frame) |
| 6074 | return true; |
| 6075 | |
| 6076 | /* Several x86 os'es need a frame pointer for other reasons, |
| 6077 | usually pertaining to setjmp. */ |
| 6078 | if (SUBTARGET_FRAME_POINTER_REQUIRED) |
| 6079 | return true; |
| 6080 | |
| 6081 | /* For older 32-bit runtimes setjmp requires valid frame-pointer. */ |
| 6082 | if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp) |
| 6083 | return true; |
| 6084 | |
| 6085 | /* Win64 SEH, very large frames need a frame-pointer as maximum stack |
| 6086 | allocation is 4GB. */ |
| 6087 | if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE) |
| 6088 | return true; |
| 6089 | |
| 6090 | /* SSE saves require frame-pointer when stack is misaligned. */ |
| 6091 | if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128) |
| 6092 | return true; |
| 6093 | |
| 6094 | /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER |
| 6095 | turns off the frame pointer by default. Turn it back on now if |
| 6096 | we've not got a leaf function. */ |
| 6097 | if (TARGET_OMIT_LEAF_FRAME_POINTER |
| 6098 | && (!crtl->is_leaf |
| 6099 | || ix86_current_function_calls_tls_descriptor)) |
| 6100 | return true; |
| 6101 | |
| 6102 | /* Several versions of mcount for the x86 assumes that there is a |
| 6103 | frame, so we cannot allow profiling without a frame pointer. */ |
| 6104 | if (crtl->profile && !flag_fentry) |
| 6105 | return true; |
| 6106 | |
| 6107 | return false; |
| 6108 | } |
| 6109 | |
| 6110 | /* Record that the current function accesses previous call frames. */ |
| 6111 | |
| 6112 | void |
| 6113 | ix86_setup_frame_addresses (void) |
| 6114 | { |
| 6115 | cfun->machine->accesses_prev_frame = 1; |
| 6116 | } |
| 6117 | |
| 6118 | #if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0) |
| 6119 | # define USE_HIDDEN_LINKONCE 1 |
| 6120 | #else |
| 6121 | # define USE_HIDDEN_LINKONCE 0 |
| 6122 | #endif |
| 6123 | |
| 6124 | /* Label count for call and return thunks. It is used to make unique |
| 6125 | labels in call and return thunks. */ |
| 6126 | static int indirectlabelno; |
| 6127 | |
| 6128 | /* True if call thunk function is needed. */ |
| 6129 | static bool indirect_thunk_needed = false; |
| 6130 | |
| 6131 | /* Bit masks of integer registers, which contain branch target, used |
| 6132 | by call thunk functions. */ |
| 6133 | static HARD_REG_SET indirect_thunks_used; |
| 6134 | |
| 6135 | /* True if return thunk function is needed. */ |
| 6136 | static bool indirect_return_needed = false; |
| 6137 | |
| 6138 | /* True if return thunk function via CX is needed. */ |
| 6139 | static bool indirect_return_via_cx; |
| 6140 | |
| 6141 | #ifndef INDIRECT_LABEL |
| 6142 | # define INDIRECT_LABEL "LIND" |
| 6143 | #endif |
| 6144 | |
| 6145 | /* Indicate what prefix is needed for an indirect branch. */ |
| 6146 | enum indirect_thunk_prefix |
| 6147 | { |
| 6148 | indirect_thunk_prefix_none, |
| 6149 | indirect_thunk_prefix_nt |
| 6150 | }; |
| 6151 | |
| 6152 | /* Return the prefix needed for an indirect branch INSN. */ |
| 6153 | |
| 6154 | enum indirect_thunk_prefix |
| 6155 | indirect_thunk_need_prefix (rtx_insn *insn) |
| 6156 | { |
| 6157 | enum indirect_thunk_prefix need_prefix; |
| 6158 | if ((cfun->machine->indirect_branch_type |
| 6159 | == indirect_branch_thunk_extern) |
| 6160 | && ix86_notrack_prefixed_insn_p (insn)) |
| 6161 | { |
| 6162 | /* NOTRACK prefix is only used with external thunk so that it |
| 6163 | can be properly updated to support CET at run-time. */ |
| 6164 | need_prefix = indirect_thunk_prefix_nt; |
| 6165 | } |
| 6166 | else |
| 6167 | need_prefix = indirect_thunk_prefix_none; |
| 6168 | return need_prefix; |
| 6169 | } |
| 6170 | |
| 6171 | /* Fills in the label name that should be used for the indirect thunk. */ |
| 6172 | |
| 6173 | static void |
| 6174 | indirect_thunk_name (char name[32], unsigned int regno, |
| 6175 | enum indirect_thunk_prefix need_prefix, |
| 6176 | bool ret_p) |
| 6177 | { |
| 6178 | if (regno != INVALID_REGNUM && regno != CX_REG && ret_p) |
| 6179 | gcc_unreachable (); |
| 6180 | |
| 6181 | if (USE_HIDDEN_LINKONCE) |
| 6182 | { |
| 6183 | const char *prefix; |
| 6184 | |
| 6185 | if (need_prefix == indirect_thunk_prefix_nt |
| 6186 | && regno != INVALID_REGNUM) |
| 6187 | { |
| 6188 | /* NOTRACK prefix is only used with external thunk via |
| 6189 | register so that NOTRACK prefix can be added to indirect |
| 6190 | branch via register to support CET at run-time. */ |
| 6191 | prefix = "_nt" ; |
| 6192 | } |
| 6193 | else |
| 6194 | prefix = "" ; |
| 6195 | |
| 6196 | const char *ret = ret_p ? "return" : "indirect" ; |
| 6197 | |
| 6198 | if (regno != INVALID_REGNUM) |
| 6199 | { |
| 6200 | const char *reg_prefix; |
| 6201 | if (LEGACY_INT_REGNO_P (regno)) |
| 6202 | reg_prefix = TARGET_64BIT ? "r" : "e" ; |
| 6203 | else |
| 6204 | reg_prefix = "" ; |
| 6205 | sprintf (s: name, format: "__x86_%s_thunk%s_%s%s" , |
| 6206 | ret, prefix, reg_prefix, reg_names[regno]); |
| 6207 | } |
| 6208 | else |
| 6209 | sprintf (s: name, format: "__x86_%s_thunk%s" , ret, prefix); |
| 6210 | } |
| 6211 | else |
| 6212 | { |
| 6213 | if (regno != INVALID_REGNUM) |
| 6214 | ASM_GENERATE_INTERNAL_LABEL (name, "LITR" , regno); |
| 6215 | else |
| 6216 | { |
| 6217 | if (ret_p) |
| 6218 | ASM_GENERATE_INTERNAL_LABEL (name, "LRT" , 0); |
| 6219 | else |
| 6220 | ASM_GENERATE_INTERNAL_LABEL (name, "LIT" , 0); |
| 6221 | } |
| 6222 | } |
| 6223 | } |
| 6224 | |
| 6225 | /* Output a call and return thunk for indirect branch. If REGNO != -1, |
| 6226 | the function address is in REGNO and the call and return thunk looks like: |
| 6227 | |
| 6228 | call L2 |
| 6229 | L1: |
| 6230 | pause |
| 6231 | lfence |
| 6232 | jmp L1 |
| 6233 | L2: |
| 6234 | mov %REG, (%sp) |
| 6235 | ret |
| 6236 | |
| 6237 | Otherwise, the function address is on the top of stack and the |
| 6238 | call and return thunk looks like: |
| 6239 | |
| 6240 | call L2 |
| 6241 | L1: |
| 6242 | pause |
| 6243 | lfence |
| 6244 | jmp L1 |
| 6245 | L2: |
| 6246 | lea WORD_SIZE(%sp), %sp |
| 6247 | ret |
| 6248 | */ |
| 6249 | |
| 6250 | static void |
| 6251 | output_indirect_thunk (unsigned int regno) |
| 6252 | { |
| 6253 | char indirectlabel1[32]; |
| 6254 | char indirectlabel2[32]; |
| 6255 | |
| 6256 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL, |
| 6257 | indirectlabelno++); |
| 6258 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL, |
| 6259 | indirectlabelno++); |
| 6260 | |
| 6261 | /* Call */ |
| 6262 | fputs (s: "\tcall\t" , stream: asm_out_file); |
| 6263 | assemble_name_raw (asm_out_file, indirectlabel2); |
| 6264 | fputc (c: '\n', stream: asm_out_file); |
| 6265 | |
| 6266 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); |
| 6267 | |
| 6268 | /* AMD and Intel CPUs prefer each a different instruction as loop filler. |
| 6269 | Usage of both pause + lfence is compromise solution. */ |
| 6270 | fprintf (stream: asm_out_file, format: "\tpause\n\tlfence\n" ); |
| 6271 | |
| 6272 | /* Jump. */ |
| 6273 | fputs (s: "\tjmp\t" , stream: asm_out_file); |
| 6274 | assemble_name_raw (asm_out_file, indirectlabel1); |
| 6275 | fputc (c: '\n', stream: asm_out_file); |
| 6276 | |
| 6277 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); |
| 6278 | |
| 6279 | /* The above call insn pushed a word to stack. Adjust CFI info. */ |
| 6280 | if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ()) |
| 6281 | { |
| 6282 | if (! dwarf2out_do_cfi_asm ()) |
| 6283 | { |
| 6284 | dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> (); |
| 6285 | xcfi->dw_cfi_opc = DW_CFA_advance_loc4; |
| 6286 | xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2); |
| 6287 | vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi); |
| 6288 | } |
| 6289 | dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> (); |
| 6290 | xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset; |
| 6291 | xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD; |
| 6292 | vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi); |
| 6293 | dwarf2out_emit_cfi (cfi: xcfi); |
| 6294 | } |
| 6295 | |
| 6296 | if (regno != INVALID_REGNUM) |
| 6297 | { |
| 6298 | /* MOV. */ |
| 6299 | rtx xops[2]; |
| 6300 | xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx); |
| 6301 | xops[1] = gen_rtx_REG (word_mode, regno); |
| 6302 | output_asm_insn ("mov\t{%1, %0|%0, %1}" , xops); |
| 6303 | } |
| 6304 | else |
| 6305 | { |
| 6306 | /* LEA. */ |
| 6307 | rtx xops[2]; |
| 6308 | xops[0] = stack_pointer_rtx; |
| 6309 | xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
| 6310 | output_asm_insn ("lea\t{%E1, %0|%0, %E1}" , xops); |
| 6311 | } |
| 6312 | |
| 6313 | fputs (s: "\tret\n" , stream: asm_out_file); |
| 6314 | if ((ix86_harden_sls & harden_sls_return)) |
| 6315 | fputs (s: "\tint3\n" , stream: asm_out_file); |
| 6316 | } |
| 6317 | |
| 6318 | /* Output a funtion with a call and return thunk for indirect branch. |
| 6319 | If REGNO != INVALID_REGNUM, the function address is in REGNO. |
| 6320 | Otherwise, the function address is on the top of stack. Thunk is |
| 6321 | used for function return if RET_P is true. */ |
| 6322 | |
| 6323 | static void |
| 6324 | output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix, |
| 6325 | unsigned int regno, bool ret_p) |
| 6326 | { |
| 6327 | char name[32]; |
| 6328 | tree decl; |
| 6329 | |
| 6330 | /* Create __x86_indirect_thunk. */ |
| 6331 | indirect_thunk_name (name, regno, need_prefix, ret_p); |
| 6332 | decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, |
| 6333 | get_identifier (name), |
| 6334 | build_function_type_list (void_type_node, NULL_TREE)); |
| 6335 | DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, |
| 6336 | NULL_TREE, void_type_node); |
| 6337 | TREE_PUBLIC (decl) = 1; |
| 6338 | TREE_STATIC (decl) = 1; |
| 6339 | DECL_IGNORED_P (decl) = 1; |
| 6340 | |
| 6341 | #if TARGET_MACHO |
| 6342 | if (TARGET_MACHO) |
| 6343 | { |
| 6344 | switch_to_section (darwin_sections[picbase_thunk_section]); |
| 6345 | fputs ("\t.weak_definition\t" , asm_out_file); |
| 6346 | assemble_name (asm_out_file, name); |
| 6347 | fputs ("\n\t.private_extern\t" , asm_out_file); |
| 6348 | assemble_name (asm_out_file, name); |
| 6349 | putc ('\n', asm_out_file); |
| 6350 | ASM_OUTPUT_LABEL (asm_out_file, name); |
| 6351 | DECL_WEAK (decl) = 1; |
| 6352 | } |
| 6353 | else |
| 6354 | #endif |
| 6355 | if (USE_HIDDEN_LINKONCE) |
| 6356 | { |
| 6357 | cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); |
| 6358 | |
| 6359 | targetm.asm_out.unique_section (decl, 0); |
| 6360 | switch_to_section (get_named_section (decl, NULL, 0)); |
| 6361 | |
| 6362 | targetm.asm_out.globalize_label (asm_out_file, name); |
| 6363 | fputs (s: "\t.hidden\t" , stream: asm_out_file); |
| 6364 | assemble_name (asm_out_file, name); |
| 6365 | putc (c: '\n', stream: asm_out_file); |
| 6366 | ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); |
| 6367 | } |
| 6368 | else |
| 6369 | { |
| 6370 | switch_to_section (text_section); |
| 6371 | ASM_OUTPUT_LABEL (asm_out_file, name); |
| 6372 | } |
| 6373 | |
| 6374 | DECL_INITIAL (decl) = make_node (BLOCK); |
| 6375 | current_function_decl = decl; |
| 6376 | allocate_struct_function (decl, false); |
| 6377 | init_function_start (decl); |
| 6378 | /* We're about to hide the function body from callees of final_* by |
| 6379 | emitting it directly; tell them we're a thunk, if they care. */ |
| 6380 | cfun->is_thunk = true; |
| 6381 | first_function_block_is_cold = false; |
| 6382 | /* Make sure unwind info is emitted for the thunk if needed. */ |
| 6383 | final_start_function (emit_barrier (), asm_out_file, 1); |
| 6384 | |
| 6385 | output_indirect_thunk (regno); |
| 6386 | |
| 6387 | final_end_function (); |
| 6388 | init_insn_lengths (); |
| 6389 | free_after_compilation (cfun); |
| 6390 | set_cfun (NULL); |
| 6391 | current_function_decl = NULL; |
| 6392 | } |
| 6393 | |
| 6394 | static int pic_labels_used; |
| 6395 | |
| 6396 | /* Fills in the label name that should be used for a pc thunk for |
| 6397 | the given register. */ |
| 6398 | |
| 6399 | static void |
| 6400 | get_pc_thunk_name (char name[32], unsigned int regno) |
| 6401 | { |
| 6402 | gcc_assert (!TARGET_64BIT); |
| 6403 | |
| 6404 | if (USE_HIDDEN_LINKONCE) |
| 6405 | sprintf (s: name, format: "__x86.get_pc_thunk.%s" , reg_names[regno]); |
| 6406 | else |
| 6407 | ASM_GENERATE_INTERNAL_LABEL (name, "LPR" , regno); |
| 6408 | } |
| 6409 | |
| 6410 | |
| 6411 | /* This function generates code for -fpic that loads %ebx with |
| 6412 | the return address of the caller and then returns. */ |
| 6413 | |
| 6414 | static void |
| 6415 | ix86_code_end (void) |
| 6416 | { |
| 6417 | rtx xops[2]; |
| 6418 | unsigned int regno; |
| 6419 | |
| 6420 | if (indirect_return_needed) |
| 6421 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
| 6422 | INVALID_REGNUM, ret_p: true); |
| 6423 | if (indirect_return_via_cx) |
| 6424 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
| 6425 | CX_REG, ret_p: true); |
| 6426 | if (indirect_thunk_needed) |
| 6427 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
| 6428 | INVALID_REGNUM, ret_p: false); |
| 6429 | |
| 6430 | for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++) |
| 6431 | { |
| 6432 | if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno)) |
| 6433 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
| 6434 | regno, ret_p: false); |
| 6435 | } |
| 6436 | |
| 6437 | for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++) |
| 6438 | { |
| 6439 | if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno)) |
| 6440 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
| 6441 | regno, ret_p: false); |
| 6442 | } |
| 6443 | |
| 6444 | for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++) |
| 6445 | { |
| 6446 | char name[32]; |
| 6447 | tree decl; |
| 6448 | |
| 6449 | if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno)) |
| 6450 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
| 6451 | regno, ret_p: false); |
| 6452 | |
| 6453 | if (!(pic_labels_used & (1 << regno))) |
| 6454 | continue; |
| 6455 | |
| 6456 | get_pc_thunk_name (name, regno); |
| 6457 | |
| 6458 | decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, |
| 6459 | get_identifier (name), |
| 6460 | build_function_type_list (void_type_node, NULL_TREE)); |
| 6461 | DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, |
| 6462 | NULL_TREE, void_type_node); |
| 6463 | TREE_PUBLIC (decl) = 1; |
| 6464 | TREE_STATIC (decl) = 1; |
| 6465 | DECL_IGNORED_P (decl) = 1; |
| 6466 | |
| 6467 | #if TARGET_MACHO |
| 6468 | if (TARGET_MACHO) |
| 6469 | { |
| 6470 | switch_to_section (darwin_sections[picbase_thunk_section]); |
| 6471 | fputs ("\t.weak_definition\t" , asm_out_file); |
| 6472 | assemble_name (asm_out_file, name); |
| 6473 | fputs ("\n\t.private_extern\t" , asm_out_file); |
| 6474 | assemble_name (asm_out_file, name); |
| 6475 | putc ('\n', asm_out_file); |
| 6476 | ASM_OUTPUT_LABEL (asm_out_file, name); |
| 6477 | DECL_WEAK (decl) = 1; |
| 6478 | } |
| 6479 | else |
| 6480 | #endif |
| 6481 | if (USE_HIDDEN_LINKONCE) |
| 6482 | { |
| 6483 | cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); |
| 6484 | |
| 6485 | targetm.asm_out.unique_section (decl, 0); |
| 6486 | switch_to_section (get_named_section (decl, NULL, 0)); |
| 6487 | |
| 6488 | targetm.asm_out.globalize_label (asm_out_file, name); |
| 6489 | fputs (s: "\t.hidden\t" , stream: asm_out_file); |
| 6490 | assemble_name (asm_out_file, name); |
| 6491 | putc (c: '\n', stream: asm_out_file); |
| 6492 | ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); |
| 6493 | } |
| 6494 | else |
| 6495 | { |
| 6496 | switch_to_section (text_section); |
| 6497 | ASM_OUTPUT_LABEL (asm_out_file, name); |
| 6498 | } |
| 6499 | |
| 6500 | DECL_INITIAL (decl) = make_node (BLOCK); |
| 6501 | current_function_decl = decl; |
| 6502 | allocate_struct_function (decl, false); |
| 6503 | init_function_start (decl); |
| 6504 | /* We're about to hide the function body from callees of final_* by |
| 6505 | emitting it directly; tell them we're a thunk, if they care. */ |
| 6506 | cfun->is_thunk = true; |
| 6507 | first_function_block_is_cold = false; |
| 6508 | /* Make sure unwind info is emitted for the thunk if needed. */ |
| 6509 | final_start_function (emit_barrier (), asm_out_file, 1); |
| 6510 | |
| 6511 | /* Pad stack IP move with 4 instructions (two NOPs count |
| 6512 | as one instruction). */ |
| 6513 | if (TARGET_PAD_SHORT_FUNCTION) |
| 6514 | { |
| 6515 | int i = 8; |
| 6516 | |
| 6517 | while (i--) |
| 6518 | fputs (s: "\tnop\n" , stream: asm_out_file); |
| 6519 | } |
| 6520 | |
| 6521 | xops[0] = gen_rtx_REG (Pmode, regno); |
| 6522 | xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); |
| 6523 | output_asm_insn ("mov%z0\t{%1, %0|%0, %1}" , xops); |
| 6524 | fputs (s: "\tret\n" , stream: asm_out_file); |
| 6525 | final_end_function (); |
| 6526 | init_insn_lengths (); |
| 6527 | free_after_compilation (cfun); |
| 6528 | set_cfun (NULL); |
| 6529 | current_function_decl = NULL; |
| 6530 | } |
| 6531 | |
| 6532 | if (flag_split_stack) |
| 6533 | file_end_indicate_split_stack (); |
| 6534 | } |
| 6535 | |
| 6536 | /* Emit code for the SET_GOT patterns. */ |
| 6537 | |
| 6538 | const char * |
| 6539 | output_set_got (rtx dest, rtx label) |
| 6540 | { |
| 6541 | rtx xops[3]; |
| 6542 | |
| 6543 | xops[0] = dest; |
| 6544 | |
| 6545 | if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic) |
| 6546 | { |
| 6547 | /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ |
| 6548 | xops[2] = gen_rtx_MEM (Pmode, |
| 6549 | gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); |
| 6550 | output_asm_insn ("mov{l}\t{%2, %0|%0, %2}" , xops); |
| 6551 | |
| 6552 | /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. |
| 6553 | Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as |
| 6554 | an unadorned address. */ |
| 6555 | xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); |
| 6556 | SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; |
| 6557 | output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}" , xops); |
| 6558 | return "" ; |
| 6559 | } |
| 6560 | |
| 6561 | xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); |
| 6562 | |
| 6563 | if (flag_pic) |
| 6564 | { |
| 6565 | char name[32]; |
| 6566 | get_pc_thunk_name (name, REGNO (dest)); |
| 6567 | pic_labels_used |= 1 << REGNO (dest); |
| 6568 | |
| 6569 | xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); |
| 6570 | xops[2] = gen_rtx_MEM (QImode, xops[2]); |
| 6571 | output_asm_insn ("%!call\t%X2" , xops); |
| 6572 | |
| 6573 | #if TARGET_MACHO |
| 6574 | /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here. |
| 6575 | This is what will be referenced by the Mach-O PIC subsystem. */ |
| 6576 | if (machopic_should_output_picbase_label () || !label) |
| 6577 | ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); |
| 6578 | |
| 6579 | /* When we are restoring the pic base at the site of a nonlocal label, |
| 6580 | and we decided to emit the pic base above, we will still output a |
| 6581 | local label used for calculating the correction offset (even though |
| 6582 | the offset will be 0 in that case). */ |
| 6583 | if (label) |
| 6584 | targetm.asm_out.internal_label (asm_out_file, "L" , |
| 6585 | CODE_LABEL_NUMBER (label)); |
| 6586 | #endif |
| 6587 | } |
| 6588 | else |
| 6589 | { |
| 6590 | if (TARGET_MACHO) |
| 6591 | /* We don't need a pic base, we're not producing pic. */ |
| 6592 | gcc_unreachable (); |
| 6593 | |
| 6594 | xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); |
| 6595 | output_asm_insn ("mov%z0\t{%2, %0|%0, %2}" , xops); |
| 6596 | targetm.asm_out.internal_label (asm_out_file, "L" , |
| 6597 | CODE_LABEL_NUMBER (XEXP (xops[2], 0))); |
| 6598 | } |
| 6599 | |
| 6600 | if (!TARGET_MACHO) |
| 6601 | output_asm_insn ("add%z0\t{%1, %0|%0, %1}" , xops); |
| 6602 | |
| 6603 | return "" ; |
| 6604 | } |
| 6605 | |
| 6606 | /* Generate an "push" pattern for input ARG. */ |
| 6607 | |
| 6608 | rtx |
| 6609 | gen_push (rtx arg, bool ppx_p) |
| 6610 | { |
| 6611 | struct machine_function *m = cfun->machine; |
| 6612 | |
| 6613 | if (m->fs.cfa_reg == stack_pointer_rtx) |
| 6614 | m->fs.cfa_offset += UNITS_PER_WORD; |
| 6615 | m->fs.sp_offset += UNITS_PER_WORD; |
| 6616 | |
| 6617 | if (REG_P (arg) && GET_MODE (arg) != word_mode) |
| 6618 | arg = gen_rtx_REG (word_mode, REGNO (arg)); |
| 6619 | |
| 6620 | rtx stack = gen_rtx_MEM (word_mode, |
| 6621 | gen_rtx_PRE_DEC (Pmode, |
| 6622 | stack_pointer_rtx)); |
| 6623 | return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg); |
| 6624 | } |
| 6625 | |
| 6626 | rtx |
| 6627 | gen_pushfl (void) |
| 6628 | { |
| 6629 | struct machine_function *m = cfun->machine; |
| 6630 | rtx flags, mem; |
| 6631 | |
| 6632 | if (m->fs.cfa_reg == stack_pointer_rtx) |
| 6633 | m->fs.cfa_offset += UNITS_PER_WORD; |
| 6634 | m->fs.sp_offset += UNITS_PER_WORD; |
| 6635 | |
| 6636 | flags = gen_rtx_REG (CCmode, FLAGS_REG); |
| 6637 | |
| 6638 | mem = gen_rtx_MEM (word_mode, |
| 6639 | gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx)); |
| 6640 | |
| 6641 | return gen_pushfl2 (arg0: word_mode, x0: mem, x1: flags); |
| 6642 | } |
| 6643 | |
| 6644 | /* Generate an "pop" pattern for input ARG. */ |
| 6645 | |
| 6646 | rtx |
| 6647 | gen_pop (rtx arg, bool ppx_p) |
| 6648 | { |
| 6649 | if (REG_P (arg) && GET_MODE (arg) != word_mode) |
| 6650 | arg = gen_rtx_REG (word_mode, REGNO (arg)); |
| 6651 | |
| 6652 | rtx stack = gen_rtx_MEM (word_mode, |
| 6653 | gen_rtx_POST_INC (Pmode, |
| 6654 | stack_pointer_rtx)); |
| 6655 | |
| 6656 | return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack); |
| 6657 | } |
| 6658 | |
| 6659 | rtx |
| 6660 | gen_popfl (void) |
| 6661 | { |
| 6662 | rtx flags, mem; |
| 6663 | |
| 6664 | flags = gen_rtx_REG (CCmode, FLAGS_REG); |
| 6665 | |
| 6666 | mem = gen_rtx_MEM (word_mode, |
| 6667 | gen_rtx_POST_INC (Pmode, stack_pointer_rtx)); |
| 6668 | |
| 6669 | return gen_popfl1 (arg0: word_mode, x0: flags, x1: mem); |
| 6670 | } |
| 6671 | |
| 6672 | /* Generate a "push2" pattern for input ARG. */ |
| 6673 | rtx |
| 6674 | gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false) |
| 6675 | { |
| 6676 | struct machine_function *m = cfun->machine; |
| 6677 | const int offset = UNITS_PER_WORD * 2; |
| 6678 | |
| 6679 | if (m->fs.cfa_reg == stack_pointer_rtx) |
| 6680 | m->fs.cfa_offset += offset; |
| 6681 | m->fs.sp_offset += offset; |
| 6682 | |
| 6683 | if (REG_P (reg1) && GET_MODE (reg1) != word_mode) |
| 6684 | reg1 = gen_rtx_REG (word_mode, REGNO (reg1)); |
| 6685 | |
| 6686 | if (REG_P (reg2) && GET_MODE (reg2) != word_mode) |
| 6687 | reg2 = gen_rtx_REG (word_mode, REGNO (reg2)); |
| 6688 | |
| 6689 | return ppx_p ? gen_push2p_di (mem, reg1, reg2) |
| 6690 | : gen_push2_di (mem, reg1, reg2); |
| 6691 | } |
| 6692 | |
| 6693 | /* Return >= 0 if there is an unused call-clobbered register available |
| 6694 | for the entire function. */ |
| 6695 | |
| 6696 | static unsigned int |
| 6697 | ix86_select_alt_pic_regnum (void) |
| 6698 | { |
| 6699 | if (ix86_use_pseudo_pic_reg ()) |
| 6700 | return INVALID_REGNUM; |
| 6701 | |
| 6702 | if (crtl->is_leaf |
| 6703 | && !crtl->profile |
| 6704 | && !ix86_current_function_calls_tls_descriptor) |
| 6705 | { |
| 6706 | int i, drap; |
| 6707 | /* Can't use the same register for both PIC and DRAP. */ |
| 6708 | if (crtl->drap_reg) |
| 6709 | drap = REGNO (crtl->drap_reg); |
| 6710 | else |
| 6711 | drap = -1; |
| 6712 | for (i = 2; i >= 0; --i) |
| 6713 | if (i != drap && !df_regs_ever_live_p (i)) |
| 6714 | return i; |
| 6715 | } |
| 6716 | |
| 6717 | return INVALID_REGNUM; |
| 6718 | } |
| 6719 | |
| 6720 | /* Return true if REGNO is used by the epilogue. */ |
| 6721 | |
| 6722 | bool |
| 6723 | ix86_epilogue_uses (int regno) |
| 6724 | { |
| 6725 | /* If there are no caller-saved registers, we preserve all registers, |
| 6726 | except for MMX and x87 registers which aren't supported when saving |
| 6727 | and restoring registers. Don't explicitly save SP register since |
| 6728 | it is always preserved. */ |
| 6729 | return (epilogue_completed |
| 6730 | && (cfun->machine->call_saved_registers |
| 6731 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
| 6732 | && !fixed_regs[regno] |
| 6733 | && !STACK_REGNO_P (regno) |
| 6734 | && !MMX_REGNO_P (regno)); |
| 6735 | } |
| 6736 | |
| 6737 | /* Return nonzero if register REGNO can be used as a scratch register |
| 6738 | in peephole2. */ |
| 6739 | |
| 6740 | static bool |
| 6741 | ix86_hard_regno_scratch_ok (unsigned int regno) |
| 6742 | { |
| 6743 | /* If there are no caller-saved registers, we can't use any register |
| 6744 | as a scratch register after epilogue and use REGNO as scratch |
| 6745 | register only if it has been used before to avoid saving and |
| 6746 | restoring it. */ |
| 6747 | return ((cfun->machine->call_saved_registers |
| 6748 | != TYPE_NO_CALLER_SAVED_REGISTERS) |
| 6749 | || (!epilogue_completed |
| 6750 | && df_regs_ever_live_p (regno))); |
| 6751 | } |
| 6752 | |
| 6753 | /* Return TRUE if we need to save REGNO. */ |
| 6754 | |
| 6755 | bool |
| 6756 | ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined) |
| 6757 | { |
| 6758 | rtx reg; |
| 6759 | |
| 6760 | switch (cfun->machine->call_saved_registers) |
| 6761 | { |
| 6762 | case TYPE_DEFAULT_CALL_SAVED_REGISTERS: |
| 6763 | break; |
| 6764 | |
| 6765 | case TYPE_NO_CALLER_SAVED_REGISTERS: |
| 6766 | /* If there are no caller-saved registers, we preserve all |
| 6767 | registers, except for MMX and x87 registers which aren't |
| 6768 | supported when saving and restoring registers. Don't |
| 6769 | explicitly save SP register since it is always preserved. |
| 6770 | |
| 6771 | Don't preserve registers used for function return value. */ |
| 6772 | reg = crtl->return_rtx; |
| 6773 | if (reg) |
| 6774 | { |
| 6775 | unsigned int i = REGNO (reg); |
| 6776 | unsigned int nregs = REG_NREGS (reg); |
| 6777 | while (nregs-- > 0) |
| 6778 | if ((i + nregs) == regno) |
| 6779 | return false; |
| 6780 | } |
| 6781 | |
| 6782 | return (df_regs_ever_live_p (regno) |
| 6783 | && !fixed_regs[regno] |
| 6784 | && !STACK_REGNO_P (regno) |
| 6785 | && !MMX_REGNO_P (regno) |
| 6786 | && (regno != HARD_FRAME_POINTER_REGNUM |
| 6787 | || !frame_pointer_needed)); |
| 6788 | |
| 6789 | case TYPE_NO_CALLEE_SAVED_REGISTERS: |
| 6790 | case TYPE_PRESERVE_NONE: |
| 6791 | if (regno != HARD_FRAME_POINTER_REGNUM) |
| 6792 | return false; |
| 6793 | break; |
| 6794 | } |
| 6795 | |
| 6796 | if (regno == REAL_PIC_OFFSET_TABLE_REGNUM |
| 6797 | && pic_offset_table_rtx) |
| 6798 | { |
| 6799 | if (ix86_use_pseudo_pic_reg ()) |
| 6800 | { |
| 6801 | /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to |
| 6802 | _mcount in prologue. */ |
| 6803 | if (!TARGET_64BIT && flag_pic && crtl->profile) |
| 6804 | return true; |
| 6805 | } |
| 6806 | else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) |
| 6807 | || crtl->profile |
| 6808 | || crtl->calls_eh_return |
| 6809 | || crtl->uses_const_pool |
| 6810 | || cfun->has_nonlocal_label) |
| 6811 | return ix86_select_alt_pic_regnum () == INVALID_REGNUM; |
| 6812 | } |
| 6813 | |
| 6814 | if (crtl->calls_eh_return && maybe_eh_return) |
| 6815 | { |
| 6816 | unsigned i; |
| 6817 | for (i = 0; ; i++) |
| 6818 | { |
| 6819 | unsigned test = EH_RETURN_DATA_REGNO (i); |
| 6820 | if (test == INVALID_REGNUM) |
| 6821 | break; |
| 6822 | if (test == regno) |
| 6823 | return true; |
| 6824 | } |
| 6825 | } |
| 6826 | |
| 6827 | if (ignore_outlined && cfun->machine->call_ms2sysv) |
| 6828 | { |
| 6829 | unsigned count = cfun->machine->call_ms2sysv_extra_regs |
| 6830 | + xlogue_layout::MIN_REGS; |
| 6831 | if (xlogue_layout::is_stub_managed_reg (regno, count)) |
| 6832 | return false; |
| 6833 | } |
| 6834 | |
| 6835 | if (crtl->drap_reg |
| 6836 | && regno == REGNO (crtl->drap_reg) |
| 6837 | && !cfun->machine->no_drap_save_restore) |
| 6838 | return true; |
| 6839 | |
| 6840 | return (df_regs_ever_live_p (regno) |
| 6841 | && !call_used_or_fixed_reg_p (regno) |
| 6842 | && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); |
| 6843 | } |
| 6844 | |
| 6845 | /* Return number of saved general prupose registers. */ |
| 6846 | |
| 6847 | static int |
| 6848 | ix86_nsaved_regs (void) |
| 6849 | { |
| 6850 | int nregs = 0; |
| 6851 | int regno; |
| 6852 | |
| 6853 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 6854 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
| 6855 | nregs ++; |
| 6856 | return nregs; |
| 6857 | } |
| 6858 | |
| 6859 | /* Return number of saved SSE registers. */ |
| 6860 | |
| 6861 | static int |
| 6862 | ix86_nsaved_sseregs (void) |
| 6863 | { |
| 6864 | int nregs = 0; |
| 6865 | int regno; |
| 6866 | |
| 6867 | if (!TARGET_64BIT_MS_ABI |
| 6868 | && (cfun->machine->call_saved_registers |
| 6869 | != TYPE_NO_CALLER_SAVED_REGISTERS)) |
| 6870 | return 0; |
| 6871 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 6872 | if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
| 6873 | nregs ++; |
| 6874 | return nregs; |
| 6875 | } |
| 6876 | |
| 6877 | /* Given FROM and TO register numbers, say whether this elimination is |
| 6878 | allowed. If stack alignment is needed, we can only replace argument |
| 6879 | pointer with hard frame pointer, or replace frame pointer with stack |
| 6880 | pointer. Otherwise, frame pointer elimination is automatically |
| 6881 | handled and all other eliminations are valid. */ |
| 6882 | |
| 6883 | static bool |
| 6884 | ix86_can_eliminate (const int from, const int to) |
| 6885 | { |
| 6886 | if (stack_realign_fp) |
| 6887 | return ((from == ARG_POINTER_REGNUM |
| 6888 | && to == HARD_FRAME_POINTER_REGNUM) |
| 6889 | || (from == FRAME_POINTER_REGNUM |
| 6890 | && to == STACK_POINTER_REGNUM)); |
| 6891 | else |
| 6892 | return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true; |
| 6893 | } |
| 6894 | |
| 6895 | /* Return the offset between two registers, one to be eliminated, and the other |
| 6896 | its replacement, at the start of a routine. */ |
| 6897 | |
| 6898 | HOST_WIDE_INT |
| 6899 | ix86_initial_elimination_offset (int from, int to) |
| 6900 | { |
| 6901 | struct ix86_frame &frame = cfun->machine->frame; |
| 6902 | |
| 6903 | if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) |
| 6904 | return frame.hard_frame_pointer_offset; |
| 6905 | else if (from == FRAME_POINTER_REGNUM |
| 6906 | && to == HARD_FRAME_POINTER_REGNUM) |
| 6907 | return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; |
| 6908 | else |
| 6909 | { |
| 6910 | gcc_assert (to == STACK_POINTER_REGNUM); |
| 6911 | |
| 6912 | if (from == ARG_POINTER_REGNUM) |
| 6913 | return frame.stack_pointer_offset; |
| 6914 | |
| 6915 | gcc_assert (from == FRAME_POINTER_REGNUM); |
| 6916 | return frame.stack_pointer_offset - frame.frame_pointer_offset; |
| 6917 | } |
| 6918 | } |
| 6919 | |
| 6920 | /* Emits a warning for unsupported msabi to sysv pro/epilogues. */ |
| 6921 | void |
| 6922 | warn_once_call_ms2sysv_xlogues (const char *feature) |
| 6923 | { |
| 6924 | static bool warned_once = false; |
| 6925 | if (!warned_once) |
| 6926 | { |
| 6927 | warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s" , |
| 6928 | feature); |
| 6929 | warned_once = true; |
| 6930 | } |
| 6931 | } |
| 6932 | |
| 6933 | /* Return the probing interval for -fstack-clash-protection. */ |
| 6934 | |
| 6935 | static HOST_WIDE_INT |
| 6936 | get_probe_interval (void) |
| 6937 | { |
| 6938 | if (flag_stack_clash_protection) |
| 6939 | return (HOST_WIDE_INT_1U |
| 6940 | << param_stack_clash_protection_probe_interval); |
| 6941 | else |
| 6942 | return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); |
| 6943 | } |
| 6944 | |
| 6945 | /* When using -fsplit-stack, the allocation routines set a field in |
| 6946 | the TCB to the bottom of the stack plus this much space, measured |
| 6947 | in bytes. */ |
| 6948 | |
| 6949 | #define SPLIT_STACK_AVAILABLE 256 |
| 6950 | |
| 6951 | /* Return true if push2/pop2 can be generated. */ |
| 6952 | |
| 6953 | static bool |
| 6954 | ix86_can_use_push2pop2 (void) |
| 6955 | { |
| 6956 | /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */ |
| 6957 | unsigned int incoming_stack_boundary |
| 6958 | = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary |
| 6959 | ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); |
| 6960 | return incoming_stack_boundary % 128 == 0; |
| 6961 | } |
| 6962 | |
| 6963 | /* Helper function to determine whether push2/pop2 can be used in prologue or |
| 6964 | epilogue for register save/restore. */ |
| 6965 | static bool |
| 6966 | ix86_pro_and_epilogue_can_use_push2pop2 (int nregs) |
| 6967 | { |
| 6968 | if (!ix86_can_use_push2pop2 ()) |
| 6969 | return false; |
| 6970 | int aligned = cfun->machine->fs.sp_offset % 16 == 0; |
| 6971 | return TARGET_APX_PUSH2POP2 |
| 6972 | && !cfun->machine->frame.save_regs_using_mov |
| 6973 | && cfun->machine->func_type == TYPE_NORMAL |
| 6974 | && (nregs + aligned) >= 3; |
| 6975 | } |
| 6976 | |
| 6977 | /* Check if push/pop should be used to save/restore registers. */ |
| 6978 | static bool |
| 6979 | save_regs_using_push_pop (HOST_WIDE_INT to_allocate) |
| 6980 | { |
| 6981 | return ((!to_allocate && cfun->machine->frame.nregs <= 1) |
| 6982 | || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) |
| 6983 | /* If static stack checking is enabled and done with probes, |
| 6984 | the registers need to be saved before allocating the frame. */ |
| 6985 | || flag_stack_check == STATIC_BUILTIN_STACK_CHECK |
| 6986 | /* If stack clash probing needs a loop, then it needs a |
| 6987 | scratch register. But the returned register is only guaranteed |
| 6988 | to be safe to use after register saves are complete. So if |
| 6989 | stack clash protections are enabled and the allocated frame is |
| 6990 | larger than the probe interval, then use pushes to save |
| 6991 | callee saved registers. */ |
| 6992 | || (flag_stack_clash_protection |
| 6993 | && !ix86_target_stack_probe () |
| 6994 | && to_allocate > get_probe_interval ())); |
| 6995 | } |
| 6996 | |
| 6997 | /* Fill structure ix86_frame about frame of currently computed function. */ |
| 6998 | |
| 6999 | static void |
| 7000 | ix86_compute_frame_layout (void) |
| 7001 | { |
| 7002 | struct ix86_frame *frame = &cfun->machine->frame; |
| 7003 | struct machine_function *m = cfun->machine; |
| 7004 | unsigned HOST_WIDE_INT stack_alignment_needed; |
| 7005 | HOST_WIDE_INT offset; |
| 7006 | unsigned HOST_WIDE_INT preferred_alignment; |
| 7007 | HOST_WIDE_INT size = ix86_get_frame_size (); |
| 7008 | HOST_WIDE_INT to_allocate; |
| 7009 | |
| 7010 | /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit |
| 7011 | * ms_abi functions that call a sysv function. We now need to prune away |
| 7012 | * cases where it should be disabled. */ |
| 7013 | if (TARGET_64BIT && m->call_ms2sysv) |
| 7014 | { |
| 7015 | gcc_assert (TARGET_64BIT_MS_ABI); |
| 7016 | gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES); |
| 7017 | gcc_assert (!TARGET_SEH); |
| 7018 | gcc_assert (TARGET_SSE); |
| 7019 | gcc_assert (!ix86_using_red_zone ()); |
| 7020 | |
| 7021 | if (crtl->calls_eh_return) |
| 7022 | { |
| 7023 | gcc_assert (!reload_completed); |
| 7024 | m->call_ms2sysv = false; |
| 7025 | warn_once_call_ms2sysv_xlogues (feature: "__builtin_eh_return" ); |
| 7026 | } |
| 7027 | |
| 7028 | else if (ix86_static_chain_on_stack) |
| 7029 | { |
| 7030 | gcc_assert (!reload_completed); |
| 7031 | m->call_ms2sysv = false; |
| 7032 | warn_once_call_ms2sysv_xlogues (feature: "static call chains" ); |
| 7033 | } |
| 7034 | |
| 7035 | /* Finally, compute which registers the stub will manage. */ |
| 7036 | else |
| 7037 | { |
| 7038 | unsigned count = xlogue_layout::count_stub_managed_regs (); |
| 7039 | m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS; |
| 7040 | m->call_ms2sysv_pad_in = 0; |
| 7041 | } |
| 7042 | } |
| 7043 | |
| 7044 | frame->nregs = ix86_nsaved_regs (); |
| 7045 | frame->nsseregs = ix86_nsaved_sseregs (); |
| 7046 | |
| 7047 | /* 64-bit MS ABI seem to require stack alignment to be always 16, |
| 7048 | except for function prologues, leaf functions and when the defult |
| 7049 | incoming stack boundary is overriden at command line or via |
| 7050 | force_align_arg_pointer attribute. |
| 7051 | |
| 7052 | Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants |
| 7053 | at call sites, including profile function calls. |
| 7054 | |
| 7055 | For APX push2/pop2, the stack also requires 128b alignment. */ |
| 7056 | if ((ix86_pro_and_epilogue_can_use_push2pop2 (nregs: frame->nregs) |
| 7057 | && crtl->preferred_stack_boundary < 128) |
| 7058 | || (((TARGET_64BIT_MS_ABI || TARGET_MACHO) |
| 7059 | && crtl->preferred_stack_boundary < 128) |
| 7060 | && (!crtl->is_leaf || cfun->calls_alloca != 0 |
| 7061 | || ix86_current_function_calls_tls_descriptor |
| 7062 | || (TARGET_MACHO && crtl->profile) |
| 7063 | || ix86_incoming_stack_boundary < 128))) |
| 7064 | { |
| 7065 | crtl->preferred_stack_boundary = 128; |
| 7066 | if (crtl->stack_alignment_needed < 128) |
| 7067 | crtl->stack_alignment_needed = 128; |
| 7068 | } |
| 7069 | |
| 7070 | stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; |
| 7071 | preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; |
| 7072 | |
| 7073 | gcc_assert (!size || stack_alignment_needed); |
| 7074 | gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); |
| 7075 | gcc_assert (preferred_alignment <= stack_alignment_needed); |
| 7076 | |
| 7077 | /* The only ABI saving SSE regs should be 64-bit ms_abi or with |
| 7078 | no_caller_saved_registers attribue. */ |
| 7079 | gcc_assert (TARGET_64BIT |
| 7080 | || (cfun->machine->call_saved_registers |
| 7081 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
| 7082 | || !frame->nsseregs); |
| 7083 | if (TARGET_64BIT && m->call_ms2sysv) |
| 7084 | { |
| 7085 | gcc_assert (stack_alignment_needed >= 16); |
| 7086 | gcc_assert ((cfun->machine->call_saved_registers |
| 7087 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
| 7088 | || !frame->nsseregs); |
| 7089 | } |
| 7090 | |
| 7091 | /* For SEH we have to limit the amount of code movement into the prologue. |
| 7092 | At present we do this via a BLOCKAGE, at which point there's very little |
| 7093 | scheduling that can be done, which means that there's very little point |
| 7094 | in doing anything except PUSHs. */ |
| 7095 | if (TARGET_SEH) |
| 7096 | m->use_fast_prologue_epilogue = false; |
| 7097 | else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))) |
| 7098 | { |
| 7099 | int count = frame->nregs; |
| 7100 | struct cgraph_node *node = cgraph_node::get (decl: current_function_decl); |
| 7101 | |
| 7102 | /* The fast prologue uses move instead of push to save registers. This |
| 7103 | is significantly longer, but also executes faster as modern hardware |
| 7104 | can execute the moves in parallel, but can't do that for push/pop. |
| 7105 | |
| 7106 | Be careful about choosing what prologue to emit: When function takes |
| 7107 | many instructions to execute we may use slow version as well as in |
| 7108 | case function is known to be outside hot spot (this is known with |
| 7109 | feedback only). Weight the size of function by number of registers |
| 7110 | to save as it is cheap to use one or two push instructions but very |
| 7111 | slow to use many of them. |
| 7112 | |
| 7113 | Calling this hook multiple times with the same frame requirements |
| 7114 | must produce the same layout, since the RA might otherwise be |
| 7115 | unable to reach a fixed point or might fail its final sanity checks. |
| 7116 | This means that once we've assumed that a function does or doesn't |
| 7117 | have a particular size, we have to stick to that assumption |
| 7118 | regardless of how the function has changed since. */ |
| 7119 | if (count) |
| 7120 | count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; |
| 7121 | if (node->frequency < NODE_FREQUENCY_NORMAL |
| 7122 | || (flag_branch_probabilities |
| 7123 | && node->frequency < NODE_FREQUENCY_HOT)) |
| 7124 | m->use_fast_prologue_epilogue = false; |
| 7125 | else |
| 7126 | { |
| 7127 | if (count != frame->expensive_count) |
| 7128 | { |
| 7129 | frame->expensive_count = count; |
| 7130 | frame->expensive_p = expensive_function_p (count); |
| 7131 | } |
| 7132 | m->use_fast_prologue_epilogue = !frame->expensive_p; |
| 7133 | } |
| 7134 | } |
| 7135 | |
| 7136 | frame->save_regs_using_mov |
| 7137 | = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue; |
| 7138 | |
| 7139 | /* Skip return address and error code in exception handler. */ |
| 7140 | offset = INCOMING_FRAME_SP_OFFSET; |
| 7141 | |
| 7142 | /* Skip pushed static chain. */ |
| 7143 | if (ix86_static_chain_on_stack) |
| 7144 | offset += UNITS_PER_WORD; |
| 7145 | |
| 7146 | /* Skip saved base pointer. */ |
| 7147 | if (frame_pointer_needed) |
| 7148 | offset += UNITS_PER_WORD; |
| 7149 | frame->hfp_save_offset = offset; |
| 7150 | |
| 7151 | /* The traditional frame pointer location is at the top of the frame. */ |
| 7152 | frame->hard_frame_pointer_offset = offset; |
| 7153 | |
| 7154 | /* Register save area */ |
| 7155 | offset += frame->nregs * UNITS_PER_WORD; |
| 7156 | frame->reg_save_offset = offset; |
| 7157 | |
| 7158 | /* Calculate the size of the va-arg area (not including padding, if any). */ |
| 7159 | frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; |
| 7160 | |
| 7161 | /* Also adjust stack_realign_offset for the largest alignment of |
| 7162 | stack slot actually used. */ |
| 7163 | if (stack_realign_fp |
| 7164 | || (cfun->machine->max_used_stack_alignment != 0 |
| 7165 | && (offset % cfun->machine->max_used_stack_alignment) != 0)) |
| 7166 | { |
| 7167 | /* We may need a 16-byte aligned stack for the remainder of the |
| 7168 | register save area, but the stack frame for the local function |
| 7169 | may require a greater alignment if using AVX/2/512. In order |
| 7170 | to avoid wasting space, we first calculate the space needed for |
| 7171 | the rest of the register saves, add that to the stack pointer, |
| 7172 | and then realign the stack to the boundary of the start of the |
| 7173 | frame for the local function. */ |
| 7174 | HOST_WIDE_INT space_needed = 0; |
| 7175 | HOST_WIDE_INT sse_reg_space_needed = 0; |
| 7176 | |
| 7177 | if (TARGET_64BIT) |
| 7178 | { |
| 7179 | if (m->call_ms2sysv) |
| 7180 | { |
| 7181 | m->call_ms2sysv_pad_in = 0; |
| 7182 | space_needed = xlogue_layout::get_instance ().get_stack_space_used (); |
| 7183 | } |
| 7184 | |
| 7185 | else if (frame->nsseregs) |
| 7186 | /* The only ABI that has saved SSE registers (Win64) also has a |
| 7187 | 16-byte aligned default stack. However, many programs violate |
| 7188 | the ABI, and Wine64 forces stack realignment to compensate. */ |
| 7189 | space_needed = frame->nsseregs * 16; |
| 7190 | |
| 7191 | sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16); |
| 7192 | |
| 7193 | /* 64-bit frame->va_arg_size should always be a multiple of 16, but |
| 7194 | rounding to be pedantic. */ |
| 7195 | space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16); |
| 7196 | } |
| 7197 | else |
| 7198 | space_needed = frame->va_arg_size; |
| 7199 | |
| 7200 | /* Record the allocation size required prior to the realignment AND. */ |
| 7201 | frame->stack_realign_allocate = space_needed; |
| 7202 | |
| 7203 | /* The re-aligned stack starts at frame->stack_realign_offset. Values |
| 7204 | before this point are not directly comparable with values below |
| 7205 | this point. Use sp_valid_at to determine if the stack pointer is |
| 7206 | valid for a given offset, fp_valid_at for the frame pointer, or |
| 7207 | choose_baseaddr to have a base register chosen for you. |
| 7208 | |
| 7209 | Note that the result of (frame->stack_realign_offset |
| 7210 | & (stack_alignment_needed - 1)) may not equal zero. */ |
| 7211 | offset = ROUND_UP (offset + space_needed, stack_alignment_needed); |
| 7212 | frame->stack_realign_offset = offset - space_needed; |
| 7213 | frame->sse_reg_save_offset = frame->stack_realign_offset |
| 7214 | + sse_reg_space_needed; |
| 7215 | } |
| 7216 | else |
| 7217 | { |
| 7218 | frame->stack_realign_offset = offset; |
| 7219 | |
| 7220 | if (TARGET_64BIT && m->call_ms2sysv) |
| 7221 | { |
| 7222 | m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD); |
| 7223 | offset += xlogue_layout::get_instance ().get_stack_space_used (); |
| 7224 | } |
| 7225 | |
| 7226 | /* Align and set SSE register save area. */ |
| 7227 | else if (frame->nsseregs) |
| 7228 | { |
| 7229 | /* If the incoming stack boundary is at least 16 bytes, or DRAP is |
| 7230 | required and the DRAP re-alignment boundary is at least 16 bytes, |
| 7231 | then we want the SSE register save area properly aligned. */ |
| 7232 | if (ix86_incoming_stack_boundary >= 128 |
| 7233 | || (stack_realign_drap && stack_alignment_needed >= 16)) |
| 7234 | offset = ROUND_UP (offset, 16); |
| 7235 | offset += frame->nsseregs * 16; |
| 7236 | } |
| 7237 | frame->sse_reg_save_offset = offset; |
| 7238 | offset += frame->va_arg_size; |
| 7239 | } |
| 7240 | |
| 7241 | /* Align start of frame for local function. When a function call |
| 7242 | is removed, it may become a leaf function. But if argument may |
| 7243 | be passed on stack, we need to align the stack when there is no |
| 7244 | tail call. */ |
| 7245 | if (m->call_ms2sysv |
| 7246 | || frame->va_arg_size != 0 |
| 7247 | || size != 0 |
| 7248 | || !crtl->is_leaf |
| 7249 | || (!crtl->tail_call_emit |
| 7250 | && cfun->machine->outgoing_args_on_stack) |
| 7251 | || cfun->calls_alloca |
| 7252 | || ix86_current_function_calls_tls_descriptor) |
| 7253 | offset = ROUND_UP (offset, stack_alignment_needed); |
| 7254 | |
| 7255 | /* Frame pointer points here. */ |
| 7256 | frame->frame_pointer_offset = offset; |
| 7257 | |
| 7258 | offset += size; |
| 7259 | |
| 7260 | /* Add outgoing arguments area. Can be skipped if we eliminated |
| 7261 | all the function calls as dead code. |
| 7262 | Skipping is however impossible when function calls alloca. Alloca |
| 7263 | expander assumes that last crtl->outgoing_args_size |
| 7264 | of stack frame are unused. */ |
| 7265 | if (ACCUMULATE_OUTGOING_ARGS |
| 7266 | && (!crtl->is_leaf || cfun->calls_alloca |
| 7267 | || ix86_current_function_calls_tls_descriptor)) |
| 7268 | { |
| 7269 | offset += crtl->outgoing_args_size; |
| 7270 | frame->outgoing_arguments_size = crtl->outgoing_args_size; |
| 7271 | } |
| 7272 | else |
| 7273 | frame->outgoing_arguments_size = 0; |
| 7274 | |
| 7275 | /* Align stack boundary. Only needed if we're calling another function |
| 7276 | or using alloca. */ |
| 7277 | if (!crtl->is_leaf || cfun->calls_alloca |
| 7278 | || ix86_current_function_calls_tls_descriptor) |
| 7279 | offset = ROUND_UP (offset, preferred_alignment); |
| 7280 | |
| 7281 | /* We've reached end of stack frame. */ |
| 7282 | frame->stack_pointer_offset = offset; |
| 7283 | |
| 7284 | /* Size prologue needs to allocate. */ |
| 7285 | to_allocate = offset - frame->sse_reg_save_offset; |
| 7286 | |
| 7287 | if (save_regs_using_push_pop (to_allocate)) |
| 7288 | frame->save_regs_using_mov = false; |
| 7289 | |
| 7290 | if (ix86_using_red_zone () |
| 7291 | && crtl->sp_is_unchanging |
| 7292 | && crtl->is_leaf |
| 7293 | && !cfun->machine->asm_redzone_clobber_seen |
| 7294 | && !ix86_pc_thunk_call_expanded |
| 7295 | && !ix86_current_function_calls_tls_descriptor) |
| 7296 | { |
| 7297 | frame->red_zone_size = to_allocate; |
| 7298 | if (frame->save_regs_using_mov) |
| 7299 | frame->red_zone_size += frame->nregs * UNITS_PER_WORD; |
| 7300 | if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) |
| 7301 | frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; |
| 7302 | } |
| 7303 | else |
| 7304 | frame->red_zone_size = 0; |
| 7305 | frame->stack_pointer_offset -= frame->red_zone_size; |
| 7306 | |
| 7307 | /* The SEH frame pointer location is near the bottom of the frame. |
| 7308 | This is enforced by the fact that the difference between the |
| 7309 | stack pointer and the frame pointer is limited to 240 bytes in |
| 7310 | the unwind data structure. */ |
| 7311 | if (TARGET_SEH) |
| 7312 | { |
| 7313 | /* Force the frame pointer to point at or below the lowest register save |
| 7314 | area, see the SEH code in config/i386/winnt.cc for the rationale. */ |
| 7315 | frame->hard_frame_pointer_offset = frame->sse_reg_save_offset; |
| 7316 | |
| 7317 | /* If we can leave the frame pointer where it is, do so; however return |
| 7318 | the establisher frame for __builtin_frame_address (0) or else if the |
| 7319 | frame overflows the SEH maximum frame size. |
| 7320 | |
| 7321 | Note that the value returned by __builtin_frame_address (0) is quite |
| 7322 | constrained, because setjmp is piggybacked on the SEH machinery with |
| 7323 | recent versions of MinGW: |
| 7324 | |
| 7325 | # elif defined(__SEH__) |
| 7326 | # if defined(__aarch64__) || defined(_ARM64_) |
| 7327 | # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry()) |
| 7328 | # elif (__MINGW_GCC_VERSION < 40702) |
| 7329 | # define setjmp(BUF) _setjmp((BUF), mingw_getsp()) |
| 7330 | # else |
| 7331 | # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0)) |
| 7332 | # endif |
| 7333 | |
| 7334 | and the second argument passed to _setjmp, if not null, is forwarded |
| 7335 | to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has |
| 7336 | built an ExceptionRecord on the fly describing the setjmp buffer). */ |
| 7337 | const HOST_WIDE_INT diff |
| 7338 | = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; |
| 7339 | if (diff <= 255 && !crtl->accesses_prior_frames) |
| 7340 | { |
| 7341 | /* The resulting diff will be a multiple of 16 lower than 255, |
| 7342 | i.e. at most 240 as required by the unwind data structure. */ |
| 7343 | frame->hard_frame_pointer_offset += (diff & 15); |
| 7344 | } |
| 7345 | else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames) |
| 7346 | { |
| 7347 | /* Ideally we'd determine what portion of the local stack frame |
| 7348 | (within the constraint of the lowest 240) is most heavily used. |
| 7349 | But without that complication, simply bias the frame pointer |
| 7350 | by 128 bytes so as to maximize the amount of the local stack |
| 7351 | frame that is addressable with 8-bit offsets. */ |
| 7352 | frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128; |
| 7353 | } |
| 7354 | else |
| 7355 | frame->hard_frame_pointer_offset = frame->hfp_save_offset; |
| 7356 | } |
| 7357 | } |
| 7358 | |
| 7359 | /* This is semi-inlined memory_address_length, but simplified |
| 7360 | since we know that we're always dealing with reg+offset, and |
| 7361 | to avoid having to create and discard all that rtl. */ |
| 7362 | |
| 7363 | static inline int |
| 7364 | choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) |
| 7365 | { |
| 7366 | int len = 4; |
| 7367 | |
| 7368 | if (offset == 0) |
| 7369 | { |
| 7370 | /* EBP and R13 cannot be encoded without an offset. */ |
| 7371 | len = (regno == BP_REG || regno == R13_REG); |
| 7372 | } |
| 7373 | else if (IN_RANGE (offset, -128, 127)) |
| 7374 | len = 1; |
| 7375 | |
| 7376 | /* ESP and R12 must be encoded with a SIB byte. */ |
| 7377 | if (regno == SP_REG || regno == R12_REG) |
| 7378 | len++; |
| 7379 | |
| 7380 | return len; |
| 7381 | } |
| 7382 | |
| 7383 | /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in |
| 7384 | the frame save area. The register is saved at CFA - CFA_OFFSET. */ |
| 7385 | |
| 7386 | static bool |
| 7387 | sp_valid_at (HOST_WIDE_INT cfa_offset) |
| 7388 | { |
| 7389 | const struct machine_frame_state &fs = cfun->machine->fs; |
| 7390 | if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset) |
| 7391 | { |
| 7392 | /* Validate that the cfa_offset isn't in a "no-man's land". */ |
| 7393 | gcc_assert (cfa_offset <= fs.sp_realigned_fp_last); |
| 7394 | return false; |
| 7395 | } |
| 7396 | return fs.sp_valid; |
| 7397 | } |
| 7398 | |
| 7399 | /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in |
| 7400 | the frame save area. The register is saved at CFA - CFA_OFFSET. */ |
| 7401 | |
| 7402 | static inline bool |
| 7403 | fp_valid_at (HOST_WIDE_INT cfa_offset) |
| 7404 | { |
| 7405 | const struct machine_frame_state &fs = cfun->machine->fs; |
| 7406 | if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last) |
| 7407 | { |
| 7408 | /* Validate that the cfa_offset isn't in a "no-man's land". */ |
| 7409 | gcc_assert (cfa_offset >= fs.sp_realigned_offset); |
| 7410 | return false; |
| 7411 | } |
| 7412 | return fs.fp_valid; |
| 7413 | } |
| 7414 | |
| 7415 | /* Choose a base register based upon alignment requested, speed and/or |
| 7416 | size. */ |
| 7417 | |
| 7418 | static void |
| 7419 | choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg, |
| 7420 | HOST_WIDE_INT &base_offset, |
| 7421 | unsigned int align_reqested, unsigned int *align) |
| 7422 | { |
| 7423 | const struct machine_function *m = cfun->machine; |
| 7424 | unsigned int hfp_align; |
| 7425 | unsigned int drap_align; |
| 7426 | unsigned int sp_align; |
| 7427 | bool hfp_ok = fp_valid_at (cfa_offset); |
| 7428 | bool drap_ok = m->fs.drap_valid; |
| 7429 | bool sp_ok = sp_valid_at (cfa_offset); |
| 7430 | |
| 7431 | hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY; |
| 7432 | |
| 7433 | /* Filter out any registers that don't meet the requested alignment |
| 7434 | criteria. */ |
| 7435 | if (align_reqested) |
| 7436 | { |
| 7437 | if (m->fs.realigned) |
| 7438 | hfp_align = drap_align = sp_align = crtl->stack_alignment_needed; |
| 7439 | /* SEH unwind code does do not currently support REG_CFA_EXPRESSION |
| 7440 | notes (which we would need to use a realigned stack pointer), |
| 7441 | so disable on SEH targets. */ |
| 7442 | else if (m->fs.sp_realigned) |
| 7443 | sp_align = crtl->stack_alignment_needed; |
| 7444 | |
| 7445 | hfp_ok = hfp_ok && hfp_align >= align_reqested; |
| 7446 | drap_ok = drap_ok && drap_align >= align_reqested; |
| 7447 | sp_ok = sp_ok && sp_align >= align_reqested; |
| 7448 | } |
| 7449 | |
| 7450 | if (m->use_fast_prologue_epilogue) |
| 7451 | { |
| 7452 | /* Choose the base register most likely to allow the most scheduling |
| 7453 | opportunities. Generally FP is valid throughout the function, |
| 7454 | while DRAP must be reloaded within the epilogue. But choose either |
| 7455 | over the SP due to increased encoding size. */ |
| 7456 | |
| 7457 | if (hfp_ok) |
| 7458 | { |
| 7459 | base_reg = hard_frame_pointer_rtx; |
| 7460 | base_offset = m->fs.fp_offset - cfa_offset; |
| 7461 | } |
| 7462 | else if (drap_ok) |
| 7463 | { |
| 7464 | base_reg = crtl->drap_reg; |
| 7465 | base_offset = 0 - cfa_offset; |
| 7466 | } |
| 7467 | else if (sp_ok) |
| 7468 | { |
| 7469 | base_reg = stack_pointer_rtx; |
| 7470 | base_offset = m->fs.sp_offset - cfa_offset; |
| 7471 | } |
| 7472 | } |
| 7473 | else |
| 7474 | { |
| 7475 | HOST_WIDE_INT toffset; |
| 7476 | int len = 16, tlen; |
| 7477 | |
| 7478 | /* Choose the base register with the smallest address encoding. |
| 7479 | With a tie, choose FP > DRAP > SP. */ |
| 7480 | if (sp_ok) |
| 7481 | { |
| 7482 | base_reg = stack_pointer_rtx; |
| 7483 | base_offset = m->fs.sp_offset - cfa_offset; |
| 7484 | len = choose_baseaddr_len (STACK_POINTER_REGNUM, offset: base_offset); |
| 7485 | } |
| 7486 | if (drap_ok) |
| 7487 | { |
| 7488 | toffset = 0 - cfa_offset; |
| 7489 | tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), offset: toffset); |
| 7490 | if (tlen <= len) |
| 7491 | { |
| 7492 | base_reg = crtl->drap_reg; |
| 7493 | base_offset = toffset; |
| 7494 | len = tlen; |
| 7495 | } |
| 7496 | } |
| 7497 | if (hfp_ok) |
| 7498 | { |
| 7499 | toffset = m->fs.fp_offset - cfa_offset; |
| 7500 | tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, offset: toffset); |
| 7501 | if (tlen <= len) |
| 7502 | { |
| 7503 | base_reg = hard_frame_pointer_rtx; |
| 7504 | base_offset = toffset; |
| 7505 | } |
| 7506 | } |
| 7507 | } |
| 7508 | |
| 7509 | /* Set the align return value. */ |
| 7510 | if (align) |
| 7511 | { |
| 7512 | if (base_reg == stack_pointer_rtx) |
| 7513 | *align = sp_align; |
| 7514 | else if (base_reg == crtl->drap_reg) |
| 7515 | *align = drap_align; |
| 7516 | else if (base_reg == hard_frame_pointer_rtx) |
| 7517 | *align = hfp_align; |
| 7518 | } |
| 7519 | } |
| 7520 | |
| 7521 | /* Return an RTX that points to CFA_OFFSET within the stack frame and |
| 7522 | the alignment of address. If ALIGN is non-null, it should point to |
| 7523 | an alignment value (in bits) that is preferred or zero and will |
| 7524 | recieve the alignment of the base register that was selected, |
| 7525 | irrespective of rather or not CFA_OFFSET is a multiple of that |
| 7526 | alignment value. If it is possible for the base register offset to be |
| 7527 | non-immediate then SCRATCH_REGNO should specify a scratch register to |
| 7528 | use. |
| 7529 | |
| 7530 | The valid base registers are taken from CFUN->MACHINE->FS. */ |
| 7531 | |
| 7532 | static rtx |
| 7533 | choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align, |
| 7534 | unsigned int scratch_regno = INVALID_REGNUM) |
| 7535 | { |
| 7536 | rtx base_reg = NULL; |
| 7537 | HOST_WIDE_INT base_offset = 0; |
| 7538 | |
| 7539 | /* If a specific alignment is requested, try to get a base register |
| 7540 | with that alignment first. */ |
| 7541 | if (align && *align) |
| 7542 | choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: *align, align); |
| 7543 | |
| 7544 | if (!base_reg) |
| 7545 | choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: 0, align); |
| 7546 | |
| 7547 | gcc_assert (base_reg != NULL); |
| 7548 | |
| 7549 | rtx base_offset_rtx = GEN_INT (base_offset); |
| 7550 | |
| 7551 | if (!x86_64_immediate_operand (base_offset_rtx, Pmode)) |
| 7552 | { |
| 7553 | gcc_assert (scratch_regno != INVALID_REGNUM); |
| 7554 | |
| 7555 | rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
| 7556 | emit_move_insn (scratch_reg, base_offset_rtx); |
| 7557 | |
| 7558 | return gen_rtx_PLUS (Pmode, base_reg, scratch_reg); |
| 7559 | } |
| 7560 | |
| 7561 | return plus_constant (Pmode, base_reg, base_offset); |
| 7562 | } |
| 7563 | |
| 7564 | /* Emit code to save registers in the prologue. */ |
| 7565 | |
| 7566 | static void |
| 7567 | ix86_emit_save_regs (void) |
| 7568 | { |
| 7569 | int regno; |
| 7570 | rtx_insn *insn; |
| 7571 | bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return; |
| 7572 | |
| 7573 | if (!TARGET_APX_PUSH2POP2 |
| 7574 | || !ix86_can_use_push2pop2 () |
| 7575 | || cfun->machine->func_type != TYPE_NORMAL) |
| 7576 | { |
| 7577 | for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) |
| 7578 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
| 7579 | { |
| 7580 | insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno), |
| 7581 | ppx_p: use_ppx)); |
| 7582 | RTX_FRAME_RELATED_P (insn) = 1; |
| 7583 | } |
| 7584 | } |
| 7585 | else |
| 7586 | { |
| 7587 | int regno_list[2]; |
| 7588 | regno_list[0] = regno_list[1] = -1; |
| 7589 | int loaded_regnum = 0; |
| 7590 | bool aligned = cfun->machine->fs.sp_offset % 16 == 0; |
| 7591 | |
| 7592 | for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) |
| 7593 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
| 7594 | { |
| 7595 | if (aligned) |
| 7596 | { |
| 7597 | regno_list[loaded_regnum++] = regno; |
| 7598 | if (loaded_regnum == 2) |
| 7599 | { |
| 7600 | gcc_assert (regno_list[0] != -1 |
| 7601 | && regno_list[1] != -1 |
| 7602 | && regno_list[0] != regno_list[1]); |
| 7603 | const int offset = UNITS_PER_WORD * 2; |
| 7604 | rtx mem = gen_rtx_MEM (TImode, |
| 7605 | gen_rtx_PRE_DEC (Pmode, |
| 7606 | stack_pointer_rtx)); |
| 7607 | insn = emit_insn (gen_push2 (mem, |
| 7608 | reg1: gen_rtx_REG (word_mode, |
| 7609 | regno_list[0]), |
| 7610 | reg2: gen_rtx_REG (word_mode, |
| 7611 | regno_list[1]), |
| 7612 | ppx_p: use_ppx)); |
| 7613 | RTX_FRAME_RELATED_P (insn) = 1; |
| 7614 | rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3)); |
| 7615 | |
| 7616 | for (int i = 0; i < 2; i++) |
| 7617 | { |
| 7618 | rtx dwarf_reg = gen_rtx_REG (word_mode, |
| 7619 | regno_list[i]); |
| 7620 | rtx sp_offset = plus_constant (Pmode, |
| 7621 | stack_pointer_rtx, |
| 7622 | + UNITS_PER_WORD |
| 7623 | * (1 - i)); |
| 7624 | rtx tmp = gen_rtx_SET (gen_frame_mem (DImode, |
| 7625 | sp_offset), |
| 7626 | dwarf_reg); |
| 7627 | RTX_FRAME_RELATED_P (tmp) = 1; |
| 7628 | XVECEXP (dwarf, 0, i + 1) = tmp; |
| 7629 | } |
| 7630 | rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx, |
| 7631 | plus_constant (Pmode, |
| 7632 | stack_pointer_rtx, |
| 7633 | -offset)); |
| 7634 | RTX_FRAME_RELATED_P (sp_tmp) = 1; |
| 7635 | XVECEXP (dwarf, 0, 0) = sp_tmp; |
| 7636 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); |
| 7637 | |
| 7638 | loaded_regnum = 0; |
| 7639 | regno_list[0] = regno_list[1] = -1; |
| 7640 | } |
| 7641 | } |
| 7642 | else |
| 7643 | { |
| 7644 | insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno), |
| 7645 | ppx_p: use_ppx)); |
| 7646 | RTX_FRAME_RELATED_P (insn) = 1; |
| 7647 | aligned = true; |
| 7648 | } |
| 7649 | } |
| 7650 | if (loaded_regnum == 1) |
| 7651 | { |
| 7652 | insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, |
| 7653 | regno_list[0]), |
| 7654 | ppx_p: use_ppx)); |
| 7655 | RTX_FRAME_RELATED_P (insn) = 1; |
| 7656 | } |
| 7657 | } |
| 7658 | } |
| 7659 | |
| 7660 | /* Emit a single register save at CFA - CFA_OFFSET. */ |
| 7661 | |
| 7662 | static void |
| 7663 | ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, |
| 7664 | HOST_WIDE_INT cfa_offset) |
| 7665 | { |
| 7666 | struct machine_function *m = cfun->machine; |
| 7667 | rtx reg = gen_rtx_REG (mode, regno); |
| 7668 | rtx mem, addr, base, insn; |
| 7669 | unsigned int align = GET_MODE_ALIGNMENT (mode); |
| 7670 | |
| 7671 | addr = choose_baseaddr (cfa_offset, align: &align); |
| 7672 | mem = gen_frame_mem (mode, addr); |
| 7673 | |
| 7674 | /* The location aligment depends upon the base register. */ |
| 7675 | align = MIN (GET_MODE_ALIGNMENT (mode), align); |
| 7676 | gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); |
| 7677 | set_mem_align (mem, align); |
| 7678 | |
| 7679 | insn = emit_insn (gen_rtx_SET (mem, reg)); |
| 7680 | RTX_FRAME_RELATED_P (insn) = 1; |
| 7681 | |
| 7682 | base = addr; |
| 7683 | if (GET_CODE (base) == PLUS) |
| 7684 | base = XEXP (base, 0); |
| 7685 | gcc_checking_assert (REG_P (base)); |
| 7686 | |
| 7687 | /* When saving registers into a re-aligned local stack frame, avoid |
| 7688 | any tricky guessing by dwarf2out. */ |
| 7689 | if (m->fs.realigned) |
| 7690 | { |
| 7691 | gcc_checking_assert (stack_realign_drap); |
| 7692 | |
| 7693 | if (regno == REGNO (crtl->drap_reg)) |
| 7694 | { |
| 7695 | /* A bit of a hack. We force the DRAP register to be saved in |
| 7696 | the re-aligned stack frame, which provides us with a copy |
| 7697 | of the CFA that will last past the prologue. Install it. */ |
| 7698 | gcc_checking_assert (cfun->machine->fs.fp_valid); |
| 7699 | addr = plus_constant (Pmode, hard_frame_pointer_rtx, |
| 7700 | cfun->machine->fs.fp_offset - cfa_offset); |
| 7701 | mem = gen_rtx_MEM (mode, addr); |
| 7702 | add_reg_note (insn, REG_CFA_DEF_CFA, mem); |
| 7703 | } |
| 7704 | else |
| 7705 | { |
| 7706 | /* The frame pointer is a stable reference within the |
| 7707 | aligned frame. Use it. */ |
| 7708 | gcc_checking_assert (cfun->machine->fs.fp_valid); |
| 7709 | addr = plus_constant (Pmode, hard_frame_pointer_rtx, |
| 7710 | cfun->machine->fs.fp_offset - cfa_offset); |
| 7711 | mem = gen_rtx_MEM (mode, addr); |
| 7712 | add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); |
| 7713 | } |
| 7714 | } |
| 7715 | |
| 7716 | else if (base == stack_pointer_rtx && m->fs.sp_realigned |
| 7717 | && cfa_offset >= m->fs.sp_realigned_offset) |
| 7718 | { |
| 7719 | gcc_checking_assert (stack_realign_fp); |
| 7720 | add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); |
| 7721 | } |
| 7722 | |
| 7723 | /* The memory may not be relative to the current CFA register, |
| 7724 | which means that we may need to generate a new pattern for |
| 7725 | use by the unwind info. */ |
| 7726 | else if (base != m->fs.cfa_reg) |
| 7727 | { |
| 7728 | addr = plus_constant (Pmode, m->fs.cfa_reg, |
| 7729 | m->fs.cfa_offset - cfa_offset); |
| 7730 | mem = gen_rtx_MEM (mode, addr); |
| 7731 | add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); |
| 7732 | } |
| 7733 | } |
| 7734 | |
| 7735 | /* Emit code to save registers using MOV insns. |
| 7736 | First register is stored at CFA - CFA_OFFSET. */ |
| 7737 | static void |
| 7738 | ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) |
| 7739 | { |
| 7740 | unsigned int regno; |
| 7741 | |
| 7742 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 7743 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
| 7744 | { |
| 7745 | /* Skip registers, already processed by shrink wrap separate. */ |
| 7746 | if (!cfun->machine->reg_is_wrapped_separately[regno]) |
| 7747 | ix86_emit_save_reg_using_mov (mode: word_mode, regno, cfa_offset); |
| 7748 | cfa_offset -= UNITS_PER_WORD; |
| 7749 | } |
| 7750 | } |
| 7751 | |
| 7752 | /* Emit code to save SSE registers using MOV insns. |
| 7753 | First register is stored at CFA - CFA_OFFSET. */ |
| 7754 | static void |
| 7755 | ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset) |
| 7756 | { |
| 7757 | unsigned int regno; |
| 7758 | |
| 7759 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 7760 | if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
| 7761 | { |
| 7762 | ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); |
| 7763 | cfa_offset -= GET_MODE_SIZE (V4SFmode); |
| 7764 | } |
| 7765 | } |
| 7766 | |
| 7767 | static GTY(()) rtx queued_cfa_restores; |
| 7768 | |
| 7769 | /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack |
| 7770 | manipulation insn. The value is on the stack at CFA - CFA_OFFSET. |
| 7771 | Don't add the note if the previously saved value will be left untouched |
| 7772 | within stack red-zone till return, as unwinders can find the same value |
| 7773 | in the register and on the stack. */ |
| 7774 | |
| 7775 | static void |
| 7776 | ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset) |
| 7777 | { |
| 7778 | if (!crtl->shrink_wrapped |
| 7779 | && cfa_offset <= cfun->machine->fs.red_zone_offset) |
| 7780 | return; |
| 7781 | |
| 7782 | if (insn) |
| 7783 | { |
| 7784 | add_reg_note (insn, REG_CFA_RESTORE, reg); |
| 7785 | RTX_FRAME_RELATED_P (insn) = 1; |
| 7786 | } |
| 7787 | else |
| 7788 | queued_cfa_restores |
| 7789 | = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); |
| 7790 | } |
| 7791 | |
| 7792 | /* Add queued REG_CFA_RESTORE notes if any to INSN. */ |
| 7793 | |
| 7794 | static void |
| 7795 | ix86_add_queued_cfa_restore_notes (rtx insn) |
| 7796 | { |
| 7797 | rtx last; |
| 7798 | if (!queued_cfa_restores) |
| 7799 | return; |
| 7800 | for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) |
| 7801 | ; |
| 7802 | XEXP (last, 1) = REG_NOTES (insn); |
| 7803 | REG_NOTES (insn) = queued_cfa_restores; |
| 7804 | queued_cfa_restores = NULL_RTX; |
| 7805 | RTX_FRAME_RELATED_P (insn) = 1; |
| 7806 | } |
| 7807 | |
| 7808 | /* Expand prologue or epilogue stack adjustment. |
| 7809 | The pattern exist to put a dependency on all ebp-based memory accesses. |
| 7810 | STYLE should be negative if instructions should be marked as frame related, |
| 7811 | zero if %r11 register is live and cannot be freely used and positive |
| 7812 | otherwise. */ |
| 7813 | |
| 7814 | static rtx |
| 7815 | pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, |
| 7816 | int style, bool set_cfa) |
| 7817 | { |
| 7818 | struct machine_function *m = cfun->machine; |
| 7819 | rtx addend = offset; |
| 7820 | rtx insn; |
| 7821 | bool add_frame_related_expr = false; |
| 7822 | |
| 7823 | if (!x86_64_immediate_operand (offset, Pmode)) |
| 7824 | { |
| 7825 | /* r11 is used by indirect sibcall return as well, set before the |
| 7826 | epilogue and used after the epilogue. */ |
| 7827 | if (style) |
| 7828 | addend = gen_rtx_REG (Pmode, R11_REG); |
| 7829 | else |
| 7830 | { |
| 7831 | gcc_assert (src != hard_frame_pointer_rtx |
| 7832 | && dest != hard_frame_pointer_rtx); |
| 7833 | addend = hard_frame_pointer_rtx; |
| 7834 | } |
| 7835 | emit_insn (gen_rtx_SET (addend, offset)); |
| 7836 | if (style < 0) |
| 7837 | add_frame_related_expr = true; |
| 7838 | } |
| 7839 | |
| 7840 | /* Shrink wrap separate may insert prologue between TEST and JMP. In order |
| 7841 | not to affect EFlags, emit add without reg clobbering. */ |
| 7842 | if (crtl->shrink_wrapped_separate) |
| 7843 | insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc |
| 7844 | (Pmode, x0: dest, x1: src, x2: addend)); |
| 7845 | else |
| 7846 | insn = emit_insn (gen_pro_epilogue_adjust_stack_add |
| 7847 | (Pmode, x0: dest, x1: src, x2: addend)); |
| 7848 | |
| 7849 | if (style >= 0) |
| 7850 | ix86_add_queued_cfa_restore_notes (insn); |
| 7851 | |
| 7852 | if (set_cfa) |
| 7853 | { |
| 7854 | rtx r; |
| 7855 | |
| 7856 | gcc_assert (m->fs.cfa_reg == src); |
| 7857 | m->fs.cfa_offset += INTVAL (offset); |
| 7858 | m->fs.cfa_reg = dest; |
| 7859 | |
| 7860 | r = gen_rtx_PLUS (Pmode, src, offset); |
| 7861 | r = gen_rtx_SET (dest, r); |
| 7862 | add_reg_note (insn, REG_CFA_ADJUST_CFA, r); |
| 7863 | RTX_FRAME_RELATED_P (insn) = 1; |
| 7864 | } |
| 7865 | else if (style < 0) |
| 7866 | { |
| 7867 | RTX_FRAME_RELATED_P (insn) = 1; |
| 7868 | if (add_frame_related_expr) |
| 7869 | { |
| 7870 | rtx r = gen_rtx_PLUS (Pmode, src, offset); |
| 7871 | r = gen_rtx_SET (dest, r); |
| 7872 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, r); |
| 7873 | } |
| 7874 | } |
| 7875 | |
| 7876 | if (dest == stack_pointer_rtx) |
| 7877 | { |
| 7878 | HOST_WIDE_INT ooffset = m->fs.sp_offset; |
| 7879 | bool valid = m->fs.sp_valid; |
| 7880 | bool realigned = m->fs.sp_realigned; |
| 7881 | |
| 7882 | if (src == hard_frame_pointer_rtx) |
| 7883 | { |
| 7884 | valid = m->fs.fp_valid; |
| 7885 | realigned = false; |
| 7886 | ooffset = m->fs.fp_offset; |
| 7887 | } |
| 7888 | else if (src == crtl->drap_reg) |
| 7889 | { |
| 7890 | valid = m->fs.drap_valid; |
| 7891 | realigned = false; |
| 7892 | ooffset = 0; |
| 7893 | } |
| 7894 | else |
| 7895 | { |
| 7896 | /* Else there are two possibilities: SP itself, which we set |
| 7897 | up as the default above. Or EH_RETURN_STACKADJ_RTX, which is |
| 7898 | taken care of this by hand along the eh_return path. */ |
| 7899 | gcc_checking_assert (src == stack_pointer_rtx |
| 7900 | || offset == const0_rtx); |
| 7901 | } |
| 7902 | |
| 7903 | m->fs.sp_offset = ooffset - INTVAL (offset); |
| 7904 | m->fs.sp_valid = valid; |
| 7905 | m->fs.sp_realigned = realigned; |
| 7906 | } |
| 7907 | return insn; |
| 7908 | } |
| 7909 | |
| 7910 | /* Find an available register to be used as dynamic realign argument |
| 7911 | pointer regsiter. Such a register will be written in prologue and |
| 7912 | used in begin of body, so it must not be |
| 7913 | 1. parameter passing register. |
| 7914 | 2. GOT pointer. |
| 7915 | We reuse static-chain register if it is available. Otherwise, we |
| 7916 | use DI for i386 and R13 for x86-64. We chose R13 since it has |
| 7917 | shorter encoding. |
| 7918 | |
| 7919 | Return: the regno of chosen register. */ |
| 7920 | |
| 7921 | static unsigned int |
| 7922 | find_drap_reg (void) |
| 7923 | { |
| 7924 | tree decl = cfun->decl; |
| 7925 | |
| 7926 | /* Always use callee-saved register if there are no caller-saved |
| 7927 | registers. */ |
| 7928 | if (TARGET_64BIT) |
| 7929 | { |
| 7930 | /* Use R13 for nested function or function need static chain. |
| 7931 | Since function with tail call may use any caller-saved |
| 7932 | registers in epilogue, DRAP must not use caller-saved |
| 7933 | register in such case. */ |
| 7934 | if (DECL_STATIC_CHAIN (decl) |
| 7935 | || (cfun->machine->call_saved_registers |
| 7936 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
| 7937 | || crtl->tail_call_emit) |
| 7938 | return R13_REG; |
| 7939 | |
| 7940 | return R10_REG; |
| 7941 | } |
| 7942 | else |
| 7943 | { |
| 7944 | /* Use DI for nested function or function need static chain. |
| 7945 | Since function with tail call may use any caller-saved |
| 7946 | registers in epilogue, DRAP must not use caller-saved |
| 7947 | register in such case. */ |
| 7948 | if (DECL_STATIC_CHAIN (decl) |
| 7949 | || (cfun->machine->call_saved_registers |
| 7950 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
| 7951 | || crtl->tail_call_emit |
| 7952 | || crtl->calls_eh_return) |
| 7953 | return DI_REG; |
| 7954 | |
| 7955 | /* Reuse static chain register if it isn't used for parameter |
| 7956 | passing. */ |
| 7957 | if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2) |
| 7958 | { |
| 7959 | unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl)); |
| 7960 | if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0) |
| 7961 | return CX_REG; |
| 7962 | } |
| 7963 | return DI_REG; |
| 7964 | } |
| 7965 | } |
| 7966 | |
| 7967 | /* Return minimum incoming stack alignment. */ |
| 7968 | |
| 7969 | static unsigned int |
| 7970 | ix86_minimum_incoming_stack_boundary (bool sibcall) |
| 7971 | { |
| 7972 | unsigned int incoming_stack_boundary; |
| 7973 | |
| 7974 | /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */ |
| 7975 | if (cfun->machine->func_type != TYPE_NORMAL) |
| 7976 | incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY; |
| 7977 | /* Prefer the one specified at command line. */ |
| 7978 | else if (ix86_user_incoming_stack_boundary) |
| 7979 | incoming_stack_boundary = ix86_user_incoming_stack_boundary; |
| 7980 | /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary |
| 7981 | if -mstackrealign is used, it isn't used for sibcall check and |
| 7982 | estimated stack alignment is 128bit. */ |
| 7983 | else if (!sibcall |
| 7984 | && ix86_force_align_arg_pointer |
| 7985 | && crtl->stack_alignment_estimated == 128) |
| 7986 | incoming_stack_boundary = MIN_STACK_BOUNDARY; |
| 7987 | else |
| 7988 | incoming_stack_boundary = ix86_default_incoming_stack_boundary; |
| 7989 | |
| 7990 | /* Incoming stack alignment can be changed on individual functions |
| 7991 | via force_align_arg_pointer attribute. We use the smallest |
| 7992 | incoming stack boundary. */ |
| 7993 | if (incoming_stack_boundary > MIN_STACK_BOUNDARY |
| 7994 | && lookup_attribute (attr_name: "force_align_arg_pointer" , |
| 7995 | TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) |
| 7996 | incoming_stack_boundary = MIN_STACK_BOUNDARY; |
| 7997 | |
| 7998 | /* The incoming stack frame has to be aligned at least at |
| 7999 | parm_stack_boundary. */ |
| 8000 | if (incoming_stack_boundary < crtl->parm_stack_boundary) |
| 8001 | incoming_stack_boundary = crtl->parm_stack_boundary; |
| 8002 | |
| 8003 | /* Stack at entrance of main is aligned by runtime. We use the |
| 8004 | smallest incoming stack boundary. */ |
| 8005 | if (incoming_stack_boundary > MAIN_STACK_BOUNDARY |
| 8006 | && DECL_NAME (current_function_decl) |
| 8007 | && MAIN_NAME_P (DECL_NAME (current_function_decl)) |
| 8008 | && DECL_FILE_SCOPE_P (current_function_decl)) |
| 8009 | incoming_stack_boundary = MAIN_STACK_BOUNDARY; |
| 8010 | |
| 8011 | return incoming_stack_boundary; |
| 8012 | } |
| 8013 | |
| 8014 | /* Update incoming stack boundary and estimated stack alignment. */ |
| 8015 | |
| 8016 | static void |
| 8017 | ix86_update_stack_boundary (void) |
| 8018 | { |
| 8019 | ix86_incoming_stack_boundary |
| 8020 | = ix86_minimum_incoming_stack_boundary (sibcall: false); |
| 8021 | |
| 8022 | /* x86_64 vararg needs 16byte stack alignment for register save area. */ |
| 8023 | if (TARGET_64BIT |
| 8024 | && cfun->stdarg |
| 8025 | && crtl->stack_alignment_estimated < 128) |
| 8026 | crtl->stack_alignment_estimated = 128; |
| 8027 | |
| 8028 | /* __tls_get_addr needs to be called with 16-byte aligned stack. */ |
| 8029 | if (ix86_tls_descriptor_calls_expanded_in_cfun |
| 8030 | && crtl->preferred_stack_boundary < 128) |
| 8031 | crtl->preferred_stack_boundary = 128; |
| 8032 | |
| 8033 | /* For 32-bit MS ABI, both the incoming and preferred stack boundaries |
| 8034 | are 32 bits, but if force_align_arg_pointer is specified, it should |
| 8035 | prefer 128 bits for a backward-compatibility reason, which is also |
| 8036 | what the doc suggests. */ |
| 8037 | if (lookup_attribute (attr_name: "force_align_arg_pointer" , |
| 8038 | TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) |
| 8039 | && crtl->preferred_stack_boundary < 128) |
| 8040 | crtl->preferred_stack_boundary = 128; |
| 8041 | } |
| 8042 | |
| 8043 | /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is |
| 8044 | needed or an rtx for DRAP otherwise. */ |
| 8045 | |
| 8046 | static rtx |
| 8047 | ix86_get_drap_rtx (void) |
| 8048 | { |
| 8049 | /* We must use DRAP if there are outgoing arguments on stack or |
| 8050 | the stack pointer register is clobbered by asm statement and |
| 8051 | ACCUMULATE_OUTGOING_ARGS is false. */ |
| 8052 | if (ix86_force_drap |
| 8053 | || ((cfun->machine->outgoing_args_on_stack |
| 8054 | || crtl->sp_is_clobbered_by_asm) |
| 8055 | && !ACCUMULATE_OUTGOING_ARGS)) |
| 8056 | crtl->need_drap = true; |
| 8057 | |
| 8058 | if (stack_realign_drap) |
| 8059 | { |
| 8060 | /* Assign DRAP to vDRAP and returns vDRAP */ |
| 8061 | unsigned int regno = find_drap_reg (); |
| 8062 | rtx drap_vreg; |
| 8063 | rtx arg_ptr; |
| 8064 | rtx_insn *seq, *insn; |
| 8065 | |
| 8066 | arg_ptr = gen_rtx_REG (Pmode, regno); |
| 8067 | crtl->drap_reg = arg_ptr; |
| 8068 | |
| 8069 | start_sequence (); |
| 8070 | drap_vreg = copy_to_reg (arg_ptr); |
| 8071 | seq = end_sequence (); |
| 8072 | |
| 8073 | insn = emit_insn_before (seq, NEXT_INSN (insn: entry_of_function ())); |
| 8074 | if (!optimize) |
| 8075 | { |
| 8076 | add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); |
| 8077 | RTX_FRAME_RELATED_P (insn) = 1; |
| 8078 | } |
| 8079 | return drap_vreg; |
| 8080 | } |
| 8081 | else |
| 8082 | return NULL; |
| 8083 | } |
| 8084 | |
| 8085 | /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ |
| 8086 | |
| 8087 | static rtx |
| 8088 | ix86_internal_arg_pointer (void) |
| 8089 | { |
| 8090 | return virtual_incoming_args_rtx; |
| 8091 | } |
| 8092 | |
| 8093 | struct scratch_reg { |
| 8094 | rtx reg; |
| 8095 | bool saved; |
| 8096 | }; |
| 8097 | |
| 8098 | /* Return a short-lived scratch register for use on function entry. |
| 8099 | In 32-bit mode, it is valid only after the registers are saved |
| 8100 | in the prologue. This register must be released by means of |
| 8101 | release_scratch_register_on_entry once it is dead. */ |
| 8102 | |
| 8103 | static void |
| 8104 | get_scratch_register_on_entry (struct scratch_reg *sr) |
| 8105 | { |
| 8106 | int regno; |
| 8107 | |
| 8108 | sr->saved = false; |
| 8109 | |
| 8110 | if (TARGET_64BIT) |
| 8111 | { |
| 8112 | /* We always use R11 in 64-bit mode. */ |
| 8113 | regno = R11_REG; |
| 8114 | } |
| 8115 | else |
| 8116 | { |
| 8117 | tree decl = current_function_decl, fntype = TREE_TYPE (decl); |
| 8118 | bool fastcall_p |
| 8119 | = lookup_attribute (attr_name: "fastcall" , TYPE_ATTRIBUTES (fntype)) != NULL_TREE; |
| 8120 | bool thiscall_p |
| 8121 | = lookup_attribute (attr_name: "thiscall" , TYPE_ATTRIBUTES (fntype)) != NULL_TREE; |
| 8122 | bool static_chain_p = DECL_STATIC_CHAIN (decl); |
| 8123 | int regparm = ix86_function_regparm (type: fntype, decl); |
| 8124 | int drap_regno |
| 8125 | = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM; |
| 8126 | |
| 8127 | /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax |
| 8128 | for the static chain register. */ |
| 8129 | if ((regparm < 1 || (fastcall_p && !static_chain_p)) |
| 8130 | && drap_regno != AX_REG) |
| 8131 | regno = AX_REG; |
| 8132 | /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx |
| 8133 | for the static chain register. */ |
| 8134 | else if (thiscall_p && !static_chain_p && drap_regno != AX_REG) |
| 8135 | regno = AX_REG; |
| 8136 | else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG) |
| 8137 | regno = DX_REG; |
| 8138 | /* ecx is the static chain register. */ |
| 8139 | else if (regparm < 3 && !fastcall_p && !thiscall_p |
| 8140 | && !static_chain_p |
| 8141 | && drap_regno != CX_REG) |
| 8142 | regno = CX_REG; |
| 8143 | else if (ix86_save_reg (BX_REG, maybe_eh_return: true, ignore_outlined: false)) |
| 8144 | regno = BX_REG; |
| 8145 | /* esi is the static chain register. */ |
| 8146 | else if (!(regparm == 3 && static_chain_p) |
| 8147 | && ix86_save_reg (SI_REG, maybe_eh_return: true, ignore_outlined: false)) |
| 8148 | regno = SI_REG; |
| 8149 | else if (ix86_save_reg (DI_REG, maybe_eh_return: true, ignore_outlined: false)) |
| 8150 | regno = DI_REG; |
| 8151 | else |
| 8152 | { |
| 8153 | regno = (drap_regno == AX_REG ? DX_REG : AX_REG); |
| 8154 | sr->saved = true; |
| 8155 | } |
| 8156 | } |
| 8157 | |
| 8158 | sr->reg = gen_rtx_REG (Pmode, regno); |
| 8159 | if (sr->saved) |
| 8160 | { |
| 8161 | rtx_insn *insn = emit_insn (gen_push (arg: sr->reg)); |
| 8162 | RTX_FRAME_RELATED_P (insn) = 1; |
| 8163 | } |
| 8164 | } |
| 8165 | |
| 8166 | /* Release a scratch register obtained from the preceding function. |
| 8167 | |
| 8168 | If RELEASE_VIA_POP is true, we just pop the register off the stack |
| 8169 | to release it. This is what non-Linux systems use with -fstack-check. |
| 8170 | |
| 8171 | Otherwise we use OFFSET to locate the saved register and the |
| 8172 | allocated stack space becomes part of the local frame and is |
| 8173 | deallocated by the epilogue. */ |
| 8174 | |
| 8175 | static void |
| 8176 | release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset, |
| 8177 | bool release_via_pop) |
| 8178 | { |
| 8179 | if (sr->saved) |
| 8180 | { |
| 8181 | if (release_via_pop) |
| 8182 | { |
| 8183 | struct machine_function *m = cfun->machine; |
| 8184 | rtx x, insn = emit_insn (gen_pop (arg: sr->reg)); |
| 8185 | |
| 8186 | /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */ |
| 8187 | RTX_FRAME_RELATED_P (insn) = 1; |
| 8188 | x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
| 8189 | x = gen_rtx_SET (stack_pointer_rtx, x); |
| 8190 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); |
| 8191 | m->fs.sp_offset -= UNITS_PER_WORD; |
| 8192 | } |
| 8193 | else |
| 8194 | { |
| 8195 | rtx x = plus_constant (Pmode, stack_pointer_rtx, offset); |
| 8196 | x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x)); |
| 8197 | emit_insn (x); |
| 8198 | } |
| 8199 | } |
| 8200 | } |
| 8201 | |
| 8202 | /* Emit code to adjust the stack pointer by SIZE bytes while probing it. |
| 8203 | |
| 8204 | If INT_REGISTERS_SAVED is true, then integer registers have already been |
| 8205 | pushed on the stack. |
| 8206 | |
| 8207 | If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope |
| 8208 | beyond SIZE bytes. |
| 8209 | |
| 8210 | This assumes no knowledge of the current probing state, i.e. it is never |
| 8211 | allowed to allocate more than PROBE_INTERVAL bytes of stack space without |
| 8212 | a suitable probe. */ |
| 8213 | |
| 8214 | static void |
| 8215 | ix86_adjust_stack_and_probe (HOST_WIDE_INT size, |
| 8216 | const bool int_registers_saved, |
| 8217 | const bool protection_area) |
| 8218 | { |
| 8219 | struct machine_function *m = cfun->machine; |
| 8220 | |
| 8221 | /* If this function does not statically allocate stack space, then |
| 8222 | no probes are needed. */ |
| 8223 | if (!size) |
| 8224 | { |
| 8225 | /* However, the allocation of space via pushes for register |
| 8226 | saves could be viewed as allocating space, but without the |
| 8227 | need to probe. */ |
| 8228 | if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed) |
| 8229 | dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); |
| 8230 | else |
| 8231 | dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); |
| 8232 | return; |
| 8233 | } |
| 8234 | |
| 8235 | /* If we are a noreturn function, then we have to consider the |
| 8236 | possibility that we're called via a jump rather than a call. |
| 8237 | |
| 8238 | Thus we don't have the implicit probe generated by saving the |
| 8239 | return address into the stack at the call. Thus, the stack |
| 8240 | pointer could be anywhere in the guard page. The safe thing |
| 8241 | to do is emit a probe now. |
| 8242 | |
| 8243 | The probe can be avoided if we have already emitted any callee |
| 8244 | register saves into the stack or have a frame pointer (which will |
| 8245 | have been saved as well). Those saves will function as implicit |
| 8246 | probes. |
| 8247 | |
| 8248 | ?!? This should be revamped to work like aarch64 and s390 where |
| 8249 | we track the offset from the most recent probe. Normally that |
| 8250 | offset would be zero. For a noreturn function we would reset |
| 8251 | it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then |
| 8252 | we just probe when we cross PROBE_INTERVAL. */ |
| 8253 | if (TREE_THIS_VOLATILE (cfun->decl) |
| 8254 | && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)) |
| 8255 | { |
| 8256 | /* We can safely use any register here since we're just going to push |
| 8257 | its value and immediately pop it back. But we do try and avoid |
| 8258 | argument passing registers so as not to introduce dependencies in |
| 8259 | the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */ |
| 8260 | rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG); |
| 8261 | rtx_insn *insn_push = emit_insn (gen_push (arg: dummy_reg)); |
| 8262 | rtx_insn *insn_pop = emit_insn (gen_pop (arg: dummy_reg)); |
| 8263 | m->fs.sp_offset -= UNITS_PER_WORD; |
| 8264 | if (m->fs.cfa_reg == stack_pointer_rtx) |
| 8265 | { |
| 8266 | m->fs.cfa_offset -= UNITS_PER_WORD; |
| 8267 | rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); |
| 8268 | x = gen_rtx_SET (stack_pointer_rtx, x); |
| 8269 | add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x); |
| 8270 | RTX_FRAME_RELATED_P (insn_push) = 1; |
| 8271 | x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
| 8272 | x = gen_rtx_SET (stack_pointer_rtx, x); |
| 8273 | add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x); |
| 8274 | RTX_FRAME_RELATED_P (insn_pop) = 1; |
| 8275 | } |
| 8276 | emit_insn (gen_blockage ()); |
| 8277 | } |
| 8278 | |
| 8279 | const HOST_WIDE_INT probe_interval = get_probe_interval (); |
| 8280 | const int dope = 4 * UNITS_PER_WORD; |
| 8281 | |
| 8282 | /* If there is protection area, take it into account in the size. */ |
| 8283 | if (protection_area) |
| 8284 | size += probe_interval + dope; |
| 8285 | |
| 8286 | /* If we allocate less than the size of the guard statically, |
| 8287 | then no probing is necessary, but we do need to allocate |
| 8288 | the stack. */ |
| 8289 | else if (size < (1 << param_stack_clash_protection_guard_size)) |
| 8290 | { |
| 8291 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 8292 | GEN_INT (-size), style: -1, |
| 8293 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
| 8294 | dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); |
| 8295 | return; |
| 8296 | } |
| 8297 | |
| 8298 | /* We're allocating a large enough stack frame that we need to |
| 8299 | emit probes. Either emit them inline or in a loop depending |
| 8300 | on the size. */ |
| 8301 | if (size <= 4 * probe_interval) |
| 8302 | { |
| 8303 | HOST_WIDE_INT i; |
| 8304 | for (i = probe_interval; i <= size; i += probe_interval) |
| 8305 | { |
| 8306 | /* Allocate PROBE_INTERVAL bytes. */ |
| 8307 | rtx insn |
| 8308 | = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 8309 | GEN_INT (-probe_interval), style: -1, |
| 8310 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
| 8311 | add_reg_note (insn, REG_STACK_CHECK, const0_rtx); |
| 8312 | |
| 8313 | /* And probe at *sp. */ |
| 8314 | emit_stack_probe (stack_pointer_rtx); |
| 8315 | emit_insn (gen_blockage ()); |
| 8316 | } |
| 8317 | |
| 8318 | /* We need to allocate space for the residual, but we do not need |
| 8319 | to probe the residual... */ |
| 8320 | HOST_WIDE_INT residual = (i - probe_interval - size); |
| 8321 | if (residual) |
| 8322 | { |
| 8323 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 8324 | GEN_INT (residual), style: -1, |
| 8325 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
| 8326 | |
| 8327 | /* ...except if there is a protection area to maintain. */ |
| 8328 | if (protection_area) |
| 8329 | emit_stack_probe (stack_pointer_rtx); |
| 8330 | } |
| 8331 | |
| 8332 | dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); |
| 8333 | } |
| 8334 | else |
| 8335 | { |
| 8336 | /* We expect the GP registers to be saved when probes are used |
| 8337 | as the probing sequences might need a scratch register and |
| 8338 | the routine to allocate one assumes the integer registers |
| 8339 | have already been saved. */ |
| 8340 | gcc_assert (int_registers_saved); |
| 8341 | |
| 8342 | struct scratch_reg sr; |
| 8343 | get_scratch_register_on_entry (sr: &sr); |
| 8344 | |
| 8345 | /* If we needed to save a register, then account for any space |
| 8346 | that was pushed (we are not going to pop the register when |
| 8347 | we do the restore). */ |
| 8348 | if (sr.saved) |
| 8349 | size -= UNITS_PER_WORD; |
| 8350 | |
| 8351 | /* Step 1: round SIZE down to a multiple of the interval. */ |
| 8352 | HOST_WIDE_INT rounded_size = size & -probe_interval; |
| 8353 | |
| 8354 | /* Step 2: compute final value of the loop counter. Use lea if |
| 8355 | possible. */ |
| 8356 | rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size); |
| 8357 | rtx insn; |
| 8358 | if (address_no_seg_operand (addr, Pmode)) |
| 8359 | insn = emit_insn (gen_rtx_SET (sr.reg, addr)); |
| 8360 | else |
| 8361 | { |
| 8362 | emit_move_insn (sr.reg, GEN_INT (-rounded_size)); |
| 8363 | insn = emit_insn (gen_rtx_SET (sr.reg, |
| 8364 | gen_rtx_PLUS (Pmode, sr.reg, |
| 8365 | stack_pointer_rtx))); |
| 8366 | } |
| 8367 | if (m->fs.cfa_reg == stack_pointer_rtx) |
| 8368 | { |
| 8369 | add_reg_note (insn, REG_CFA_DEF_CFA, |
| 8370 | plus_constant (Pmode, sr.reg, |
| 8371 | m->fs.cfa_offset + rounded_size)); |
| 8372 | RTX_FRAME_RELATED_P (insn) = 1; |
| 8373 | } |
| 8374 | |
| 8375 | /* Step 3: the loop. */ |
| 8376 | rtx size_rtx = GEN_INT (rounded_size); |
| 8377 | insn = emit_insn (gen_adjust_stack_and_probe (Pmode, x0: sr.reg, x1: sr.reg, |
| 8378 | x2: size_rtx)); |
| 8379 | if (m->fs.cfa_reg == stack_pointer_rtx) |
| 8380 | { |
| 8381 | m->fs.cfa_offset += rounded_size; |
| 8382 | add_reg_note (insn, REG_CFA_DEF_CFA, |
| 8383 | plus_constant (Pmode, stack_pointer_rtx, |
| 8384 | m->fs.cfa_offset)); |
| 8385 | RTX_FRAME_RELATED_P (insn) = 1; |
| 8386 | } |
| 8387 | m->fs.sp_offset += rounded_size; |
| 8388 | emit_insn (gen_blockage ()); |
| 8389 | |
| 8390 | /* Step 4: adjust SP if we cannot assert at compile-time that SIZE |
| 8391 | is equal to ROUNDED_SIZE. */ |
| 8392 | |
| 8393 | if (size != rounded_size) |
| 8394 | { |
| 8395 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 8396 | GEN_INT (rounded_size - size), style: -1, |
| 8397 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
| 8398 | |
| 8399 | if (protection_area) |
| 8400 | emit_stack_probe (stack_pointer_rtx); |
| 8401 | } |
| 8402 | |
| 8403 | dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); |
| 8404 | |
| 8405 | /* This does not deallocate the space reserved for the scratch |
| 8406 | register. That will be deallocated in the epilogue. */ |
| 8407 | release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: false); |
| 8408 | } |
| 8409 | |
| 8410 | /* Adjust back to account for the protection area. */ |
| 8411 | if (protection_area) |
| 8412 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 8413 | GEN_INT (probe_interval + dope), style: -1, |
| 8414 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
| 8415 | |
| 8416 | /* Make sure nothing is scheduled before we are done. */ |
| 8417 | emit_insn (gen_blockage ()); |
| 8418 | } |
| 8419 | |
| 8420 | /* Adjust the stack pointer up to REG while probing it. */ |
| 8421 | |
| 8422 | const char * |
| 8423 | output_adjust_stack_and_probe (rtx reg) |
| 8424 | { |
| 8425 | static int labelno = 0; |
| 8426 | char loop_lab[32]; |
| 8427 | rtx xops[2]; |
| 8428 | |
| 8429 | ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL" , labelno++); |
| 8430 | |
| 8431 | /* Loop. */ |
| 8432 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
| 8433 | |
| 8434 | /* SP = SP + PROBE_INTERVAL. */ |
| 8435 | xops[0] = stack_pointer_rtx; |
| 8436 | xops[1] = GEN_INT (get_probe_interval ()); |
| 8437 | output_asm_insn ("sub%z0\t{%1, %0|%0, %1}" , xops); |
| 8438 | |
| 8439 | /* Probe at SP. */ |
| 8440 | xops[1] = const0_rtx; |
| 8441 | output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}" , xops); |
| 8442 | |
| 8443 | /* Test if SP == LAST_ADDR. */ |
| 8444 | xops[0] = stack_pointer_rtx; |
| 8445 | xops[1] = reg; |
| 8446 | output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}" , xops); |
| 8447 | |
| 8448 | /* Branch. */ |
| 8449 | fputs (s: "\tjne\t" , stream: asm_out_file); |
| 8450 | assemble_name_raw (asm_out_file, loop_lab); |
| 8451 | fputc (c: '\n', stream: asm_out_file); |
| 8452 | |
| 8453 | return "" ; |
| 8454 | } |
| 8455 | |
| 8456 | /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, |
| 8457 | inclusive. These are offsets from the current stack pointer. |
| 8458 | |
| 8459 | INT_REGISTERS_SAVED is true if integer registers have already been |
| 8460 | pushed on the stack. */ |
| 8461 | |
| 8462 | static void |
| 8463 | ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, |
| 8464 | const bool int_registers_saved) |
| 8465 | { |
| 8466 | const HOST_WIDE_INT probe_interval = get_probe_interval (); |
| 8467 | |
| 8468 | /* See if we have a constant small number of probes to generate. If so, |
| 8469 | that's the easy case. The run-time loop is made up of 6 insns in the |
| 8470 | generic case while the compile-time loop is made up of n insns for n # |
| 8471 | of intervals. */ |
| 8472 | if (size <= 6 * probe_interval) |
| 8473 | { |
| 8474 | HOST_WIDE_INT i; |
| 8475 | |
| 8476 | /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until |
| 8477 | it exceeds SIZE. If only one probe is needed, this will not |
| 8478 | generate any code. Then probe at FIRST + SIZE. */ |
| 8479 | for (i = probe_interval; i < size; i += probe_interval) |
| 8480 | emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
| 8481 | -(first + i))); |
| 8482 | |
| 8483 | emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
| 8484 | -(first + size))); |
| 8485 | } |
| 8486 | |
| 8487 | /* Otherwise, do the same as above, but in a loop. Note that we must be |
| 8488 | extra careful with variables wrapping around because we might be at |
| 8489 | the very top (or the very bottom) of the address space and we have |
| 8490 | to be able to handle this case properly; in particular, we use an |
| 8491 | equality test for the loop condition. */ |
| 8492 | else |
| 8493 | { |
| 8494 | /* We expect the GP registers to be saved when probes are used |
| 8495 | as the probing sequences might need a scratch register and |
| 8496 | the routine to allocate one assumes the integer registers |
| 8497 | have already been saved. */ |
| 8498 | gcc_assert (int_registers_saved); |
| 8499 | |
| 8500 | HOST_WIDE_INT rounded_size, last; |
| 8501 | struct scratch_reg sr; |
| 8502 | |
| 8503 | get_scratch_register_on_entry (sr: &sr); |
| 8504 | |
| 8505 | |
| 8506 | /* Step 1: round SIZE to the previous multiple of the interval. */ |
| 8507 | |
| 8508 | rounded_size = ROUND_DOWN (size, probe_interval); |
| 8509 | |
| 8510 | |
| 8511 | /* Step 2: compute initial and final value of the loop counter. */ |
| 8512 | |
| 8513 | /* TEST_OFFSET = FIRST. */ |
| 8514 | emit_move_insn (sr.reg, GEN_INT (-first)); |
| 8515 | |
| 8516 | /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */ |
| 8517 | last = first + rounded_size; |
| 8518 | |
| 8519 | |
| 8520 | /* Step 3: the loop |
| 8521 | |
| 8522 | do |
| 8523 | { |
| 8524 | TEST_ADDR = TEST_ADDR + PROBE_INTERVAL |
| 8525 | probe at TEST_ADDR |
| 8526 | } |
| 8527 | while (TEST_ADDR != LAST_ADDR) |
| 8528 | |
| 8529 | probes at FIRST + N * PROBE_INTERVAL for values of N from 1 |
| 8530 | until it is equal to ROUNDED_SIZE. */ |
| 8531 | |
| 8532 | emit_insn |
| 8533 | (gen_probe_stack_range (Pmode, x0: sr.reg, x1: sr.reg, GEN_INT (-last))); |
| 8534 | |
| 8535 | |
| 8536 | /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time |
| 8537 | that SIZE is equal to ROUNDED_SIZE. */ |
| 8538 | |
| 8539 | if (size != rounded_size) |
| 8540 | emit_stack_probe (plus_constant (Pmode, |
| 8541 | gen_rtx_PLUS (Pmode, |
| 8542 | stack_pointer_rtx, |
| 8543 | sr.reg), |
| 8544 | rounded_size - size)); |
| 8545 | |
| 8546 | release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: true); |
| 8547 | } |
| 8548 | |
| 8549 | /* Make sure nothing is scheduled before we are done. */ |
| 8550 | emit_insn (gen_blockage ()); |
| 8551 | } |
| 8552 | |
| 8553 | /* Probe a range of stack addresses from REG to END, inclusive. These are |
| 8554 | offsets from the current stack pointer. */ |
| 8555 | |
| 8556 | const char * |
| 8557 | output_probe_stack_range (rtx reg, rtx end) |
| 8558 | { |
| 8559 | static int labelno = 0; |
| 8560 | char loop_lab[32]; |
| 8561 | rtx xops[3]; |
| 8562 | |
| 8563 | ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL" , labelno++); |
| 8564 | |
| 8565 | /* Loop. */ |
| 8566 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
| 8567 | |
| 8568 | /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ |
| 8569 | xops[0] = reg; |
| 8570 | xops[1] = GEN_INT (get_probe_interval ()); |
| 8571 | output_asm_insn ("sub%z0\t{%1, %0|%0, %1}" , xops); |
| 8572 | |
| 8573 | /* Probe at TEST_ADDR. */ |
| 8574 | xops[0] = stack_pointer_rtx; |
| 8575 | xops[1] = reg; |
| 8576 | xops[2] = const0_rtx; |
| 8577 | output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}" , xops); |
| 8578 | |
| 8579 | /* Test if TEST_ADDR == LAST_ADDR. */ |
| 8580 | xops[0] = reg; |
| 8581 | xops[1] = end; |
| 8582 | output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}" , xops); |
| 8583 | |
| 8584 | /* Branch. */ |
| 8585 | fputs (s: "\tjne\t" , stream: asm_out_file); |
| 8586 | assemble_name_raw (asm_out_file, loop_lab); |
| 8587 | fputc (c: '\n', stream: asm_out_file); |
| 8588 | |
| 8589 | return "" ; |
| 8590 | } |
| 8591 | |
| 8592 | /* Data passed to ix86_update_stack_alignment. */ |
| 8593 | struct stack_access_data |
| 8594 | { |
| 8595 | /* The stack access register. */ |
| 8596 | const_rtx reg; |
| 8597 | /* Pointer to stack alignment. */ |
| 8598 | unsigned int *stack_alignment; |
| 8599 | }; |
| 8600 | |
| 8601 | /* Update the maximum stack slot alignment from memory alignment in PAT. */ |
| 8602 | |
| 8603 | static void |
| 8604 | ix86_update_stack_alignment (rtx, const_rtx pat, void *data) |
| 8605 | { |
| 8606 | /* This insn may reference stack slot. Update the maximum stack slot |
| 8607 | alignment if the memory is referenced by the stack access register. */ |
| 8608 | stack_access_data *p = (stack_access_data *) data; |
| 8609 | |
| 8610 | subrtx_iterator::array_type array; |
| 8611 | FOR_EACH_SUBRTX (iter, array, pat, ALL) |
| 8612 | { |
| 8613 | auto op = *iter; |
| 8614 | if (MEM_P (op)) |
| 8615 | { |
| 8616 | if (reg_mentioned_p (p->reg, XEXP (op, 0))) |
| 8617 | { |
| 8618 | unsigned int alignment = MEM_ALIGN (op); |
| 8619 | |
| 8620 | if (alignment > *p->stack_alignment) |
| 8621 | *p->stack_alignment = alignment; |
| 8622 | break; |
| 8623 | } |
| 8624 | else |
| 8625 | iter.skip_subrtxes (); |
| 8626 | } |
| 8627 | } |
| 8628 | } |
| 8629 | |
| 8630 | /* Helper function for ix86_find_all_reg_uses. */ |
| 8631 | |
| 8632 | static void |
| 8633 | ix86_find_all_reg_uses_1 (HARD_REG_SET ®set, |
| 8634 | rtx set, unsigned int regno, |
| 8635 | auto_bitmap &worklist) |
| 8636 | { |
| 8637 | rtx dest = SET_DEST (set); |
| 8638 | |
| 8639 | if (!REG_P (dest)) |
| 8640 | return; |
| 8641 | |
| 8642 | /* Reject non-Pmode modes. */ |
| 8643 | if (GET_MODE (dest) != Pmode) |
| 8644 | return; |
| 8645 | |
| 8646 | unsigned int dst_regno = REGNO (dest); |
| 8647 | |
| 8648 | if (TEST_HARD_REG_BIT (set: regset, bit: dst_regno)) |
| 8649 | return; |
| 8650 | |
| 8651 | const_rtx src = SET_SRC (set); |
| 8652 | |
| 8653 | subrtx_iterator::array_type array; |
| 8654 | FOR_EACH_SUBRTX (iter, array, src, ALL) |
| 8655 | { |
| 8656 | auto op = *iter; |
| 8657 | |
| 8658 | if (MEM_P (op)) |
| 8659 | iter.skip_subrtxes (); |
| 8660 | |
| 8661 | if (REG_P (op) && REGNO (op) == regno) |
| 8662 | { |
| 8663 | /* Add this register to register set. */ |
| 8664 | add_to_hard_reg_set (regs: ®set, Pmode, regno: dst_regno); |
| 8665 | bitmap_set_bit (worklist, dst_regno); |
| 8666 | break; |
| 8667 | } |
| 8668 | } |
| 8669 | } |
| 8670 | |
| 8671 | /* Find all registers defined with register REGNO. */ |
| 8672 | |
| 8673 | static void |
| 8674 | ix86_find_all_reg_uses (HARD_REG_SET ®set, |
| 8675 | unsigned int regno, auto_bitmap &worklist) |
| 8676 | { |
| 8677 | for (df_ref ref = DF_REG_USE_CHAIN (regno); |
| 8678 | ref != NULL; |
| 8679 | ref = DF_REF_NEXT_REG (ref)) |
| 8680 | { |
| 8681 | if (DF_REF_IS_ARTIFICIAL (ref)) |
| 8682 | continue; |
| 8683 | |
| 8684 | rtx_insn *insn = DF_REF_INSN (ref); |
| 8685 | |
| 8686 | if (!NONJUMP_INSN_P (insn)) |
| 8687 | continue; |
| 8688 | |
| 8689 | unsigned int ref_regno = DF_REF_REGNO (ref); |
| 8690 | |
| 8691 | rtx set = single_set (insn); |
| 8692 | if (set) |
| 8693 | { |
| 8694 | ix86_find_all_reg_uses_1 (regset, set, |
| 8695 | regno: ref_regno, worklist); |
| 8696 | continue; |
| 8697 | } |
| 8698 | |
| 8699 | rtx pat = PATTERN (insn); |
| 8700 | if (GET_CODE (pat) != PARALLEL) |
| 8701 | continue; |
| 8702 | |
| 8703 | for (int i = 0; i < XVECLEN (pat, 0); i++) |
| 8704 | { |
| 8705 | rtx exp = XVECEXP (pat, 0, i); |
| 8706 | |
| 8707 | if (GET_CODE (exp) == SET) |
| 8708 | ix86_find_all_reg_uses_1 (regset, set: exp, |
| 8709 | regno: ref_regno, worklist); |
| 8710 | } |
| 8711 | } |
| 8712 | } |
| 8713 | |
| 8714 | /* Set stack_frame_required to false if stack frame isn't required. |
| 8715 | Update STACK_ALIGNMENT to the largest alignment, in bits, of stack |
| 8716 | slot used if stack frame is required and CHECK_STACK_SLOT is true. */ |
| 8717 | |
| 8718 | static void |
| 8719 | ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, |
| 8720 | bool check_stack_slot) |
| 8721 | { |
| 8722 | HARD_REG_SET set_up_by_prologue, prologue_used; |
| 8723 | basic_block bb; |
| 8724 | |
| 8725 | CLEAR_HARD_REG_SET (set&: prologue_used); |
| 8726 | CLEAR_HARD_REG_SET (set&: set_up_by_prologue); |
| 8727 | add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); |
| 8728 | add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); |
| 8729 | add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, |
| 8730 | HARD_FRAME_POINTER_REGNUM); |
| 8731 | |
| 8732 | bool require_stack_frame = false; |
| 8733 | |
| 8734 | FOR_EACH_BB_FN (bb, cfun) |
| 8735 | { |
| 8736 | rtx_insn *insn; |
| 8737 | FOR_BB_INSNS (bb, insn) |
| 8738 | if (NONDEBUG_INSN_P (insn) |
| 8739 | && requires_stack_frame_p (insn, prologue_used, |
| 8740 | set_up_by_prologue)) |
| 8741 | { |
| 8742 | require_stack_frame = true; |
| 8743 | break; |
| 8744 | } |
| 8745 | } |
| 8746 | |
| 8747 | cfun->machine->stack_frame_required = require_stack_frame; |
| 8748 | |
| 8749 | /* Stop if we don't need to check stack slot. */ |
| 8750 | if (!check_stack_slot) |
| 8751 | return; |
| 8752 | |
| 8753 | /* The preferred stack alignment is the minimum stack alignment. */ |
| 8754 | if (stack_alignment > crtl->preferred_stack_boundary) |
| 8755 | stack_alignment = crtl->preferred_stack_boundary; |
| 8756 | |
| 8757 | HARD_REG_SET stack_slot_access; |
| 8758 | CLEAR_HARD_REG_SET (set&: stack_slot_access); |
| 8759 | |
| 8760 | /* Stack slot can be accessed by stack pointer, frame pointer or |
| 8761 | registers defined by stack pointer or frame pointer. */ |
| 8762 | auto_bitmap worklist; |
| 8763 | |
| 8764 | add_to_hard_reg_set (regs: &stack_slot_access, Pmode, STACK_POINTER_REGNUM); |
| 8765 | bitmap_set_bit (worklist, STACK_POINTER_REGNUM); |
| 8766 | |
| 8767 | if (frame_pointer_needed) |
| 8768 | { |
| 8769 | add_to_hard_reg_set (regs: &stack_slot_access, Pmode, |
| 8770 | HARD_FRAME_POINTER_REGNUM); |
| 8771 | bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM); |
| 8772 | } |
| 8773 | |
| 8774 | unsigned int regno; |
| 8775 | |
| 8776 | do |
| 8777 | { |
| 8778 | regno = bitmap_clear_first_set_bit (worklist); |
| 8779 | ix86_find_all_reg_uses (regset&: stack_slot_access, regno, worklist); |
| 8780 | } |
| 8781 | while (!bitmap_empty_p (map: worklist)); |
| 8782 | |
| 8783 | hard_reg_set_iterator hrsi; |
| 8784 | stack_access_data data; |
| 8785 | |
| 8786 | data.stack_alignment = &stack_alignment; |
| 8787 | |
| 8788 | EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi) |
| 8789 | for (df_ref ref = DF_REG_USE_CHAIN (regno); |
| 8790 | ref != NULL; |
| 8791 | ref = DF_REF_NEXT_REG (ref)) |
| 8792 | { |
| 8793 | if (DF_REF_IS_ARTIFICIAL (ref)) |
| 8794 | continue; |
| 8795 | |
| 8796 | rtx_insn *insn = DF_REF_INSN (ref); |
| 8797 | |
| 8798 | if (!NONJUMP_INSN_P (insn)) |
| 8799 | continue; |
| 8800 | |
| 8801 | data.reg = DF_REF_REG (ref); |
| 8802 | note_stores (insn, ix86_update_stack_alignment, &data); |
| 8803 | } |
| 8804 | } |
| 8805 | |
| 8806 | /* Finalize stack_realign_needed and frame_pointer_needed flags, which |
| 8807 | will guide prologue/epilogue to be generated in correct form. */ |
| 8808 | |
| 8809 | static void |
| 8810 | ix86_finalize_stack_frame_flags (void) |
| 8811 | { |
| 8812 | /* Check if stack realign is really needed after reload, and |
| 8813 | stores result in cfun */ |
| 8814 | unsigned int incoming_stack_boundary |
| 8815 | = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary |
| 8816 | ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); |
| 8817 | unsigned int stack_alignment |
| 8818 | = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor |
| 8819 | ? crtl->max_used_stack_slot_alignment |
| 8820 | : crtl->stack_alignment_needed); |
| 8821 | unsigned int stack_realign |
| 8822 | = (incoming_stack_boundary < stack_alignment); |
| 8823 | bool recompute_frame_layout_p = false; |
| 8824 | |
| 8825 | if (crtl->stack_realign_finalized) |
| 8826 | { |
| 8827 | /* After stack_realign_needed is finalized, we can't no longer |
| 8828 | change it. */ |
| 8829 | gcc_assert (crtl->stack_realign_needed == stack_realign); |
| 8830 | return; |
| 8831 | } |
| 8832 | |
| 8833 | /* It is always safe to compute max_used_stack_alignment. We |
| 8834 | compute it only if 128-bit aligned load/store may be generated |
| 8835 | on misaligned stack slot which will lead to segfault. */ |
| 8836 | bool check_stack_slot |
| 8837 | = (stack_realign || crtl->max_used_stack_slot_alignment >= 128); |
| 8838 | ix86_find_max_used_stack_alignment (stack_alignment, |
| 8839 | check_stack_slot); |
| 8840 | |
| 8841 | /* If the only reason for frame_pointer_needed is that we conservatively |
| 8842 | assumed stack realignment might be needed or -fno-omit-frame-pointer |
| 8843 | is used, but in the end nothing that needed the stack alignment had |
| 8844 | been spilled nor stack access, clear frame_pointer_needed and say we |
| 8845 | don't need stack realignment. |
| 8846 | |
| 8847 | When vector register is used for piecewise move and store, we don't |
| 8848 | increase stack_alignment_needed as there is no register spill for |
| 8849 | piecewise move and store. Since stack_realign_needed is set to true |
| 8850 | by checking stack_alignment_estimated which is updated by pseudo |
| 8851 | vector register usage, we also need to check stack_realign_needed to |
| 8852 | eliminate frame pointer. */ |
| 8853 | if ((stack_realign |
| 8854 | || (!flag_omit_frame_pointer && optimize) |
| 8855 | || crtl->stack_realign_needed) |
| 8856 | && frame_pointer_needed |
| 8857 | && crtl->is_leaf |
| 8858 | && crtl->sp_is_unchanging |
| 8859 | && !ix86_current_function_calls_tls_descriptor |
| 8860 | && !crtl->accesses_prior_frames |
| 8861 | && !cfun->calls_alloca |
| 8862 | && !crtl->calls_eh_return |
| 8863 | /* See ira_setup_eliminable_regset for the rationale. */ |
| 8864 | && !(STACK_CHECK_MOVING_SP |
| 8865 | && flag_stack_check |
| 8866 | && flag_exceptions |
| 8867 | && cfun->can_throw_non_call_exceptions) |
| 8868 | && !ix86_frame_pointer_required () |
| 8869 | && ix86_get_frame_size () == 0 |
| 8870 | && ix86_nsaved_sseregs () == 0 |
| 8871 | && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) |
| 8872 | { |
| 8873 | if (cfun->machine->stack_frame_required) |
| 8874 | { |
| 8875 | /* Stack frame is required. If stack alignment needed is less |
| 8876 | than incoming stack boundary, don't realign stack. */ |
| 8877 | stack_realign = incoming_stack_boundary < stack_alignment; |
| 8878 | if (!stack_realign) |
| 8879 | { |
| 8880 | crtl->max_used_stack_slot_alignment |
| 8881 | = incoming_stack_boundary; |
| 8882 | crtl->stack_alignment_needed |
| 8883 | = incoming_stack_boundary; |
| 8884 | /* Also update preferred_stack_boundary for leaf |
| 8885 | functions. */ |
| 8886 | crtl->preferred_stack_boundary |
| 8887 | = incoming_stack_boundary; |
| 8888 | } |
| 8889 | } |
| 8890 | else |
| 8891 | { |
| 8892 | /* If drap has been set, but it actually isn't live at the |
| 8893 | start of the function, there is no reason to set it up. */ |
| 8894 | if (crtl->drap_reg) |
| 8895 | { |
| 8896 | basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; |
| 8897 | if (! REGNO_REG_SET_P (DF_LR_IN (bb), |
| 8898 | REGNO (crtl->drap_reg))) |
| 8899 | { |
| 8900 | crtl->drap_reg = NULL_RTX; |
| 8901 | crtl->need_drap = false; |
| 8902 | } |
| 8903 | } |
| 8904 | else |
| 8905 | cfun->machine->no_drap_save_restore = true; |
| 8906 | |
| 8907 | frame_pointer_needed = false; |
| 8908 | stack_realign = false; |
| 8909 | crtl->max_used_stack_slot_alignment = incoming_stack_boundary; |
| 8910 | crtl->stack_alignment_needed = incoming_stack_boundary; |
| 8911 | crtl->stack_alignment_estimated = incoming_stack_boundary; |
| 8912 | if (crtl->preferred_stack_boundary > incoming_stack_boundary) |
| 8913 | crtl->preferred_stack_boundary = incoming_stack_boundary; |
| 8914 | df_finish_pass (true); |
| 8915 | df_scan_alloc (NULL); |
| 8916 | df_scan_blocks (); |
| 8917 | df_compute_regs_ever_live (true); |
| 8918 | df_analyze (); |
| 8919 | |
| 8920 | if (flag_var_tracking) |
| 8921 | { |
| 8922 | /* Since frame pointer is no longer available, replace it with |
| 8923 | stack pointer - UNITS_PER_WORD in debug insns. */ |
| 8924 | df_ref ref, next; |
| 8925 | for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM); |
| 8926 | ref; ref = next) |
| 8927 | { |
| 8928 | next = DF_REF_NEXT_REG (ref); |
| 8929 | if (!DF_REF_INSN_INFO (ref)) |
| 8930 | continue; |
| 8931 | |
| 8932 | /* Make sure the next ref is for a different instruction, |
| 8933 | so that we're not affected by the rescan. */ |
| 8934 | rtx_insn *insn = DF_REF_INSN (ref); |
| 8935 | while (next && DF_REF_INSN (next) == insn) |
| 8936 | next = DF_REF_NEXT_REG (next); |
| 8937 | |
| 8938 | if (DEBUG_INSN_P (insn)) |
| 8939 | { |
| 8940 | bool changed = false; |
| 8941 | for (; ref != next; ref = DF_REF_NEXT_REG (ref)) |
| 8942 | { |
| 8943 | rtx *loc = DF_REF_LOC (ref); |
| 8944 | if (*loc == hard_frame_pointer_rtx) |
| 8945 | { |
| 8946 | *loc = plus_constant (Pmode, |
| 8947 | stack_pointer_rtx, |
| 8948 | -UNITS_PER_WORD); |
| 8949 | changed = true; |
| 8950 | } |
| 8951 | } |
| 8952 | if (changed) |
| 8953 | df_insn_rescan (insn); |
| 8954 | } |
| 8955 | } |
| 8956 | } |
| 8957 | |
| 8958 | recompute_frame_layout_p = true; |
| 8959 | } |
| 8960 | } |
| 8961 | else if (crtl->max_used_stack_slot_alignment >= 128 |
| 8962 | && cfun->machine->stack_frame_required) |
| 8963 | { |
| 8964 | /* We don't need to realign stack. max_used_stack_alignment is |
| 8965 | used to decide how stack frame should be aligned. This is |
| 8966 | independent of any psABIs nor 32-bit vs 64-bit. */ |
| 8967 | cfun->machine->max_used_stack_alignment |
| 8968 | = stack_alignment / BITS_PER_UNIT; |
| 8969 | } |
| 8970 | |
| 8971 | if (crtl->stack_realign_needed != stack_realign) |
| 8972 | recompute_frame_layout_p = true; |
| 8973 | crtl->stack_realign_needed = stack_realign; |
| 8974 | crtl->stack_realign_finalized = true; |
| 8975 | if (recompute_frame_layout_p) |
| 8976 | ix86_compute_frame_layout (); |
| 8977 | } |
| 8978 | |
| 8979 | /* Delete SET_GOT right after entry block if it is allocated to reg. */ |
| 8980 | |
| 8981 | static void |
| 8982 | ix86_elim_entry_set_got (rtx reg) |
| 8983 | { |
| 8984 | basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; |
| 8985 | rtx_insn *c_insn = BB_HEAD (bb); |
| 8986 | if (!NONDEBUG_INSN_P (c_insn)) |
| 8987 | c_insn = next_nonnote_nondebug_insn (c_insn); |
| 8988 | if (c_insn && NONJUMP_INSN_P (c_insn)) |
| 8989 | { |
| 8990 | rtx pat = PATTERN (insn: c_insn); |
| 8991 | if (GET_CODE (pat) == PARALLEL) |
| 8992 | { |
| 8993 | rtx set = XVECEXP (pat, 0, 0); |
| 8994 | if (GET_CODE (set) == SET |
| 8995 | && GET_CODE (SET_SRC (set)) == UNSPEC |
| 8996 | && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT |
| 8997 | && REGNO (SET_DEST (set)) == REGNO (reg)) |
| 8998 | delete_insn (c_insn); |
| 8999 | } |
| 9000 | } |
| 9001 | } |
| 9002 | |
| 9003 | static rtx |
| 9004 | gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) |
| 9005 | { |
| 9006 | rtx addr, mem; |
| 9007 | |
| 9008 | if (offset) |
| 9009 | addr = plus_constant (Pmode, frame_reg, offset); |
| 9010 | mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg); |
| 9011 | return gen_rtx_SET (store ? mem : reg, store ? reg : mem); |
| 9012 | } |
| 9013 | |
| 9014 | static inline rtx |
| 9015 | gen_frame_load (rtx reg, rtx frame_reg, int offset) |
| 9016 | { |
| 9017 | return gen_frame_set (reg, frame_reg, offset, store: false); |
| 9018 | } |
| 9019 | |
| 9020 | static inline rtx |
| 9021 | gen_frame_store (rtx reg, rtx frame_reg, int offset) |
| 9022 | { |
| 9023 | return gen_frame_set (reg, frame_reg, offset, store: true); |
| 9024 | } |
| 9025 | |
| 9026 | static void |
| 9027 | ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame) |
| 9028 | { |
| 9029 | struct machine_function *m = cfun->machine; |
| 9030 | const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS |
| 9031 | + m->call_ms2sysv_extra_regs; |
| 9032 | rtvec v = rtvec_alloc (ncregs + 1); |
| 9033 | unsigned int align, i, vi = 0; |
| 9034 | rtx_insn *insn; |
| 9035 | rtx sym, addr; |
| 9036 | rtx rax = gen_rtx_REG (word_mode, AX_REG); |
| 9037 | const class xlogue_layout &xlogue = xlogue_layout::get_instance (); |
| 9038 | |
| 9039 | /* AL should only be live with sysv_abi. */ |
| 9040 | gcc_assert (!ix86_eax_live_at_start_p ()); |
| 9041 | gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset); |
| 9042 | |
| 9043 | /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather |
| 9044 | we've actually realigned the stack or not. */ |
| 9045 | align = GET_MODE_ALIGNMENT (V4SFmode); |
| 9046 | addr = choose_baseaddr (cfa_offset: frame.stack_realign_offset |
| 9047 | + xlogue.get_stub_ptr_offset (), align: &align, AX_REG); |
| 9048 | gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); |
| 9049 | |
| 9050 | emit_insn (gen_rtx_SET (rax, addr)); |
| 9051 | |
| 9052 | /* Get the stub symbol. */ |
| 9053 | sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP |
| 9054 | : XLOGUE_STUB_SAVE); |
| 9055 | RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); |
| 9056 | |
| 9057 | for (i = 0; i < ncregs; ++i) |
| 9058 | { |
| 9059 | const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i); |
| 9060 | rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode), |
| 9061 | r.regno); |
| 9062 | RTVEC_ELT (v, vi++) = gen_frame_store (reg, frame_reg: rax, offset: -r.offset); |
| 9063 | } |
| 9064 | |
| 9065 | gcc_assert (vi == (unsigned)GET_NUM_ELEM (v)); |
| 9066 | |
| 9067 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v)); |
| 9068 | RTX_FRAME_RELATED_P (insn) = true; |
| 9069 | } |
| 9070 | |
| 9071 | /* Generate and return an insn body to AND X with Y. */ |
| 9072 | |
| 9073 | static rtx_insn * |
| 9074 | gen_and2_insn (rtx x, rtx y) |
| 9075 | { |
| 9076 | enum insn_code icode = optab_handler (op: and_optab, GET_MODE (x)); |
| 9077 | |
| 9078 | gcc_assert (insn_operand_matches (icode, 0, x)); |
| 9079 | gcc_assert (insn_operand_matches (icode, 1, x)); |
| 9080 | gcc_assert (insn_operand_matches (icode, 2, y)); |
| 9081 | |
| 9082 | return GEN_FCN (icode) (x, x, y); |
| 9083 | } |
| 9084 | |
| 9085 | /* Expand the prologue into a bunch of separate insns. */ |
| 9086 | |
| 9087 | void |
| 9088 | ix86_expand_prologue (void) |
| 9089 | { |
| 9090 | struct machine_function *m = cfun->machine; |
| 9091 | rtx insn, t; |
| 9092 | HOST_WIDE_INT allocate; |
| 9093 | bool int_registers_saved; |
| 9094 | bool sse_registers_saved; |
| 9095 | bool save_stub_call_needed; |
| 9096 | rtx static_chain = NULL_RTX; |
| 9097 | |
| 9098 | ix86_last_zero_store_uid = 0; |
| 9099 | if (ix86_function_naked (fn: current_function_decl)) |
| 9100 | { |
| 9101 | if (flag_stack_usage_info) |
| 9102 | current_function_static_stack_size = 0; |
| 9103 | return; |
| 9104 | } |
| 9105 | |
| 9106 | ix86_finalize_stack_frame_flags (); |
| 9107 | |
| 9108 | /* DRAP should not coexist with stack_realign_fp */ |
| 9109 | gcc_assert (!(crtl->drap_reg && stack_realign_fp)); |
| 9110 | |
| 9111 | memset (s: &m->fs, c: 0, n: sizeof (m->fs)); |
| 9112 | |
| 9113 | /* Initialize CFA state for before the prologue. */ |
| 9114 | m->fs.cfa_reg = stack_pointer_rtx; |
| 9115 | m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; |
| 9116 | |
| 9117 | /* Track SP offset to the CFA. We continue tracking this after we've |
| 9118 | swapped the CFA register away from SP. In the case of re-alignment |
| 9119 | this is fudged; we're interested to offsets within the local frame. */ |
| 9120 | m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; |
| 9121 | m->fs.sp_valid = true; |
| 9122 | m->fs.sp_realigned = false; |
| 9123 | |
| 9124 | const struct ix86_frame &frame = cfun->machine->frame; |
| 9125 | |
| 9126 | if (!TARGET_64BIT && ix86_function_ms_hook_prologue (fn: current_function_decl)) |
| 9127 | { |
| 9128 | /* We should have already generated an error for any use of |
| 9129 | ms_hook on a nested function. */ |
| 9130 | gcc_checking_assert (!ix86_static_chain_on_stack); |
| 9131 | |
| 9132 | /* Check if profiling is active and we shall use profiling before |
| 9133 | prologue variant. If so sorry. */ |
| 9134 | if (crtl->profile && flag_fentry != 0) |
| 9135 | sorry ("%<ms_hook_prologue%> attribute is not compatible " |
| 9136 | "with %<-mfentry%> for 32-bit" ); |
| 9137 | |
| 9138 | /* In ix86_asm_output_function_label we emitted: |
| 9139 | 8b ff movl.s %edi,%edi |
| 9140 | 55 push %ebp |
| 9141 | 8b ec movl.s %esp,%ebp |
| 9142 | |
| 9143 | This matches the hookable function prologue in Win32 API |
| 9144 | functions in Microsoft Windows XP Service Pack 2 and newer. |
| 9145 | Wine uses this to enable Windows apps to hook the Win32 API |
| 9146 | functions provided by Wine. |
| 9147 | |
| 9148 | What that means is that we've already set up the frame pointer. */ |
| 9149 | |
| 9150 | if (frame_pointer_needed |
| 9151 | && !(crtl->drap_reg && crtl->stack_realign_needed)) |
| 9152 | { |
| 9153 | rtx push, mov; |
| 9154 | |
| 9155 | /* We've decided to use the frame pointer already set up. |
| 9156 | Describe this to the unwinder by pretending that both |
| 9157 | push and mov insns happen right here. |
| 9158 | |
| 9159 | Putting the unwind info here at the end of the ms_hook |
| 9160 | is done so that we can make absolutely certain we get |
| 9161 | the required byte sequence at the start of the function, |
| 9162 | rather than relying on an assembler that can produce |
| 9163 | the exact encoding required. |
| 9164 | |
| 9165 | However it does mean (in the unpatched case) that we have |
| 9166 | a 1 insn window where the asynchronous unwind info is |
| 9167 | incorrect. However, if we placed the unwind info at |
| 9168 | its correct location we would have incorrect unwind info |
| 9169 | in the patched case. Which is probably all moot since |
| 9170 | I don't expect Wine generates dwarf2 unwind info for the |
| 9171 | system libraries that use this feature. */ |
| 9172 | |
| 9173 | insn = emit_insn (gen_blockage ()); |
| 9174 | |
| 9175 | push = gen_push (hard_frame_pointer_rtx); |
| 9176 | mov = gen_rtx_SET (hard_frame_pointer_rtx, |
| 9177 | stack_pointer_rtx); |
| 9178 | RTX_FRAME_RELATED_P (push) = 1; |
| 9179 | RTX_FRAME_RELATED_P (mov) = 1; |
| 9180 | |
| 9181 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9182 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
| 9183 | gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov))); |
| 9184 | |
| 9185 | /* Note that gen_push incremented m->fs.cfa_offset, even |
| 9186 | though we didn't emit the push insn here. */ |
| 9187 | m->fs.cfa_reg = hard_frame_pointer_rtx; |
| 9188 | m->fs.fp_offset = m->fs.cfa_offset; |
| 9189 | m->fs.fp_valid = true; |
| 9190 | } |
| 9191 | else |
| 9192 | { |
| 9193 | /* The frame pointer is not needed so pop %ebp again. |
| 9194 | This leaves us with a pristine state. */ |
| 9195 | emit_insn (gen_pop (hard_frame_pointer_rtx)); |
| 9196 | } |
| 9197 | } |
| 9198 | |
| 9199 | /* The first insn of a function that accepts its static chain on the |
| 9200 | stack is to push the register that would be filled in by a direct |
| 9201 | call. This insn will be skipped by the trampoline. */ |
| 9202 | else if (ix86_static_chain_on_stack) |
| 9203 | { |
| 9204 | static_chain = ix86_static_chain (cfun->decl, false); |
| 9205 | insn = emit_insn (gen_push (arg: static_chain)); |
| 9206 | emit_insn (gen_blockage ()); |
| 9207 | |
| 9208 | /* We don't want to interpret this push insn as a register save, |
| 9209 | only as a stack adjustment. The real copy of the register as |
| 9210 | a save will be done later, if needed. */ |
| 9211 | t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); |
| 9212 | t = gen_rtx_SET (stack_pointer_rtx, t); |
| 9213 | add_reg_note (insn, REG_CFA_ADJUST_CFA, t); |
| 9214 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9215 | } |
| 9216 | |
| 9217 | /* Emit prologue code to adjust stack alignment and setup DRAP, in case |
| 9218 | of DRAP is needed and stack realignment is really needed after reload */ |
| 9219 | if (stack_realign_drap) |
| 9220 | { |
| 9221 | int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; |
| 9222 | |
| 9223 | /* Can't use DRAP in interrupt function. */ |
| 9224 | if (cfun->machine->func_type != TYPE_NORMAL) |
| 9225 | sorry ("Dynamic Realign Argument Pointer (DRAP) not supported " |
| 9226 | "in interrupt service routine. This may be worked " |
| 9227 | "around by avoiding functions with aggregate return." ); |
| 9228 | |
| 9229 | /* Only need to push parameter pointer reg if it is caller saved. */ |
| 9230 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
| 9231 | { |
| 9232 | /* Push arg pointer reg */ |
| 9233 | insn = emit_insn (gen_push (crtl->drap_reg)); |
| 9234 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9235 | } |
| 9236 | |
| 9237 | /* Grab the argument pointer. */ |
| 9238 | t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset); |
| 9239 | insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t)); |
| 9240 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9241 | m->fs.cfa_reg = crtl->drap_reg; |
| 9242 | m->fs.cfa_offset = 0; |
| 9243 | |
| 9244 | /* Align the stack. */ |
| 9245 | insn = emit_insn (gen_and2_insn (stack_pointer_rtx, |
| 9246 | GEN_INT (-align_bytes))); |
| 9247 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9248 | |
| 9249 | /* Replicate the return address on the stack so that return |
| 9250 | address can be reached via (argp - 1) slot. This is needed |
| 9251 | to implement macro RETURN_ADDR_RTX and intrinsic function |
| 9252 | expand_builtin_return_addr etc. */ |
| 9253 | t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD); |
| 9254 | t = gen_frame_mem (word_mode, t); |
| 9255 | insn = emit_insn (gen_push (arg: t)); |
| 9256 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9257 | |
| 9258 | /* For the purposes of frame and register save area addressing, |
| 9259 | we've started over with a new frame. */ |
| 9260 | m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; |
| 9261 | m->fs.realigned = true; |
| 9262 | |
| 9263 | if (static_chain) |
| 9264 | { |
| 9265 | /* Replicate static chain on the stack so that static chain |
| 9266 | can be reached via (argp - 2) slot. This is needed for |
| 9267 | nested function with stack realignment. */ |
| 9268 | insn = emit_insn (gen_push (arg: static_chain)); |
| 9269 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9270 | } |
| 9271 | } |
| 9272 | |
| 9273 | int_registers_saved = (frame.nregs == 0); |
| 9274 | sse_registers_saved = (frame.nsseregs == 0); |
| 9275 | save_stub_call_needed = (m->call_ms2sysv); |
| 9276 | gcc_assert (sse_registers_saved || !save_stub_call_needed); |
| 9277 | |
| 9278 | if (frame_pointer_needed && !m->fs.fp_valid) |
| 9279 | { |
| 9280 | /* Note: AT&T enter does NOT have reversed args. Enter is probably |
| 9281 | slower on all targets. Also sdb didn't like it. */ |
| 9282 | insn = emit_insn (gen_push (hard_frame_pointer_rtx)); |
| 9283 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9284 | |
| 9285 | if (m->fs.sp_offset == frame.hard_frame_pointer_offset) |
| 9286 | { |
| 9287 | insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); |
| 9288 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9289 | |
| 9290 | if (m->fs.cfa_reg == stack_pointer_rtx) |
| 9291 | m->fs.cfa_reg = hard_frame_pointer_rtx; |
| 9292 | m->fs.fp_offset = m->fs.sp_offset; |
| 9293 | m->fs.fp_valid = true; |
| 9294 | } |
| 9295 | } |
| 9296 | |
| 9297 | if (!int_registers_saved) |
| 9298 | { |
| 9299 | /* If saving registers via PUSH, do so now. */ |
| 9300 | if (!frame.save_regs_using_mov) |
| 9301 | { |
| 9302 | ix86_emit_save_regs (); |
| 9303 | m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return; |
| 9304 | int_registers_saved = true; |
| 9305 | gcc_assert (m->fs.sp_offset == frame.reg_save_offset); |
| 9306 | } |
| 9307 | |
| 9308 | /* When using red zone we may start register saving before allocating |
| 9309 | the stack frame saving one cycle of the prologue. However, avoid |
| 9310 | doing this if we have to probe the stack; at least on x86_64 the |
| 9311 | stack probe can turn into a call that clobbers a red zone location. */ |
| 9312 | else if (ix86_using_red_zone () |
| 9313 | && (! TARGET_STACK_PROBE |
| 9314 | || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) |
| 9315 | { |
| 9316 | HOST_WIDE_INT allocate_offset; |
| 9317 | if (crtl->shrink_wrapped_separate) |
| 9318 | { |
| 9319 | allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset; |
| 9320 | |
| 9321 | /* Adjust the total offset at the beginning of the function. */ |
| 9322 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 9323 | GEN_INT (allocate_offset), style: -1, |
| 9324 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
| 9325 | m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset; |
| 9326 | } |
| 9327 | |
| 9328 | ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset); |
| 9329 | int_registers_saved = true; |
| 9330 | } |
| 9331 | } |
| 9332 | |
| 9333 | if (frame.red_zone_size != 0) |
| 9334 | cfun->machine->red_zone_used = true; |
| 9335 | |
| 9336 | if (stack_realign_fp) |
| 9337 | { |
| 9338 | int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; |
| 9339 | gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); |
| 9340 | |
| 9341 | /* Record last valid frame pointer offset. */ |
| 9342 | m->fs.sp_realigned_fp_last = frame.reg_save_offset; |
| 9343 | |
| 9344 | /* The computation of the size of the re-aligned stack frame means |
| 9345 | that we must allocate the size of the register save area before |
| 9346 | performing the actual alignment. Otherwise we cannot guarantee |
| 9347 | that there's enough storage above the realignment point. */ |
| 9348 | allocate = frame.reg_save_offset - m->fs.sp_offset |
| 9349 | + frame.stack_realign_allocate; |
| 9350 | if (allocate) |
| 9351 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 9352 | GEN_INT (-allocate), style: -1, set_cfa: false); |
| 9353 | |
| 9354 | /* Align the stack. */ |
| 9355 | emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes))); |
| 9356 | m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes); |
| 9357 | m->fs.sp_realigned_offset = m->fs.sp_offset |
| 9358 | - frame.stack_realign_allocate; |
| 9359 | /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset. |
| 9360 | Beyond this point, stack access should be done via choose_baseaddr or |
| 9361 | by using sp_valid_at and fp_valid_at to determine the correct base |
| 9362 | register. Henceforth, any CFA offset should be thought of as logical |
| 9363 | and not physical. */ |
| 9364 | gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last); |
| 9365 | gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset); |
| 9366 | m->fs.sp_realigned = true; |
| 9367 | |
| 9368 | /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which |
| 9369 | is needed to describe where a register is saved using a realigned |
| 9370 | stack pointer, so we need to invalidate the stack pointer for that |
| 9371 | target. */ |
| 9372 | if (TARGET_SEH) |
| 9373 | m->fs.sp_valid = false; |
| 9374 | |
| 9375 | /* If SP offset is non-immediate after allocation of the stack frame, |
| 9376 | then emit SSE saves or stub call prior to allocating the rest of the |
| 9377 | stack frame. This is less efficient for the out-of-line stub because |
| 9378 | we can't combine allocations across the call barrier, but it's better |
| 9379 | than using a scratch register. */ |
| 9380 | else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset |
| 9381 | - m->fs.sp_realigned_offset), |
| 9382 | Pmode)) |
| 9383 | { |
| 9384 | if (!sse_registers_saved) |
| 9385 | { |
| 9386 | ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset); |
| 9387 | sse_registers_saved = true; |
| 9388 | } |
| 9389 | else if (save_stub_call_needed) |
| 9390 | { |
| 9391 | ix86_emit_outlined_ms2sysv_save (frame); |
| 9392 | save_stub_call_needed = false; |
| 9393 | } |
| 9394 | } |
| 9395 | } |
| 9396 | |
| 9397 | allocate = frame.stack_pointer_offset - m->fs.sp_offset; |
| 9398 | |
| 9399 | if (flag_stack_usage_info) |
| 9400 | { |
| 9401 | /* We start to count from ARG_POINTER. */ |
| 9402 | HOST_WIDE_INT stack_size = frame.stack_pointer_offset; |
| 9403 | |
| 9404 | /* If it was realigned, take into account the fake frame. */ |
| 9405 | if (stack_realign_drap) |
| 9406 | { |
| 9407 | if (ix86_static_chain_on_stack) |
| 9408 | stack_size += UNITS_PER_WORD; |
| 9409 | |
| 9410 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
| 9411 | stack_size += UNITS_PER_WORD; |
| 9412 | |
| 9413 | /* This over-estimates by 1 minimal-stack-alignment-unit but |
| 9414 | mitigates that by counting in the new return address slot. */ |
| 9415 | current_function_dynamic_stack_size |
| 9416 | += crtl->stack_alignment_needed / BITS_PER_UNIT; |
| 9417 | } |
| 9418 | |
| 9419 | current_function_static_stack_size = stack_size; |
| 9420 | } |
| 9421 | |
| 9422 | /* On SEH target with very large frame size, allocate an area to save |
| 9423 | SSE registers (as the very large allocation won't be described). */ |
| 9424 | if (TARGET_SEH |
| 9425 | && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE |
| 9426 | && !sse_registers_saved) |
| 9427 | { |
| 9428 | HOST_WIDE_INT sse_size |
| 9429 | = frame.sse_reg_save_offset - frame.reg_save_offset; |
| 9430 | |
| 9431 | gcc_assert (int_registers_saved); |
| 9432 | |
| 9433 | /* No need to do stack checking as the area will be immediately |
| 9434 | written. */ |
| 9435 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 9436 | GEN_INT (-sse_size), style: -1, |
| 9437 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
| 9438 | allocate -= sse_size; |
| 9439 | ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset); |
| 9440 | sse_registers_saved = true; |
| 9441 | } |
| 9442 | |
| 9443 | /* If stack clash protection is requested, then probe the stack, unless it |
| 9444 | is already probed on the target. */ |
| 9445 | if (allocate >= 0 |
| 9446 | && flag_stack_clash_protection |
| 9447 | && !ix86_target_stack_probe ()) |
| 9448 | { |
| 9449 | ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: false); |
| 9450 | allocate = 0; |
| 9451 | } |
| 9452 | |
| 9453 | /* The stack has already been decremented by the instruction calling us |
| 9454 | so probe if the size is non-negative to preserve the protection area. */ |
| 9455 | else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK) |
| 9456 | { |
| 9457 | const HOST_WIDE_INT probe_interval = get_probe_interval (); |
| 9458 | |
| 9459 | if (STACK_CHECK_MOVING_SP) |
| 9460 | { |
| 9461 | if (crtl->is_leaf |
| 9462 | && !cfun->calls_alloca |
| 9463 | && allocate <= probe_interval) |
| 9464 | ; |
| 9465 | |
| 9466 | else |
| 9467 | { |
| 9468 | ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: true); |
| 9469 | allocate = 0; |
| 9470 | } |
| 9471 | } |
| 9472 | |
| 9473 | else |
| 9474 | { |
| 9475 | HOST_WIDE_INT size = allocate; |
| 9476 | |
| 9477 | if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000)) |
| 9478 | size = 0x80000000 - get_stack_check_protect () - 1; |
| 9479 | |
| 9480 | if (TARGET_STACK_PROBE) |
| 9481 | { |
| 9482 | if (crtl->is_leaf && !cfun->calls_alloca) |
| 9483 | { |
| 9484 | if (size > probe_interval) |
| 9485 | ix86_emit_probe_stack_range (first: 0, size, int_registers_saved); |
| 9486 | } |
| 9487 | else |
| 9488 | ix86_emit_probe_stack_range (first: 0, |
| 9489 | size: size + get_stack_check_protect (), |
| 9490 | int_registers_saved); |
| 9491 | } |
| 9492 | else |
| 9493 | { |
| 9494 | if (crtl->is_leaf && !cfun->calls_alloca) |
| 9495 | { |
| 9496 | if (size > probe_interval |
| 9497 | && size > get_stack_check_protect ()) |
| 9498 | ix86_emit_probe_stack_range (first: get_stack_check_protect (), |
| 9499 | size: (size |
| 9500 | - get_stack_check_protect ()), |
| 9501 | int_registers_saved); |
| 9502 | } |
| 9503 | else |
| 9504 | ix86_emit_probe_stack_range (first: get_stack_check_protect (), size, |
| 9505 | int_registers_saved); |
| 9506 | } |
| 9507 | } |
| 9508 | } |
| 9509 | |
| 9510 | if (allocate == 0) |
| 9511 | ; |
| 9512 | else if (!ix86_target_stack_probe () |
| 9513 | || frame.stack_pointer_offset < CHECK_STACK_LIMIT) |
| 9514 | { |
| 9515 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 9516 | GEN_INT (-allocate), style: -1, |
| 9517 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
| 9518 | } |
| 9519 | else |
| 9520 | { |
| 9521 | rtx eax = gen_rtx_REG (Pmode, AX_REG); |
| 9522 | rtx r10 = NULL; |
| 9523 | const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); |
| 9524 | bool eax_live = ix86_eax_live_at_start_p (); |
| 9525 | bool r10_live = false; |
| 9526 | |
| 9527 | if (TARGET_64BIT) |
| 9528 | r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); |
| 9529 | |
| 9530 | if (eax_live) |
| 9531 | { |
| 9532 | insn = emit_insn (gen_push (arg: eax)); |
| 9533 | allocate -= UNITS_PER_WORD; |
| 9534 | /* Note that SEH directives need to continue tracking the stack |
| 9535 | pointer even after the frame pointer has been set up. */ |
| 9536 | if (sp_is_cfa_reg || TARGET_SEH) |
| 9537 | { |
| 9538 | if (sp_is_cfa_reg) |
| 9539 | m->fs.cfa_offset += UNITS_PER_WORD; |
| 9540 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9541 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
| 9542 | gen_rtx_SET (stack_pointer_rtx, |
| 9543 | plus_constant (Pmode, |
| 9544 | stack_pointer_rtx, |
| 9545 | -UNITS_PER_WORD))); |
| 9546 | } |
| 9547 | } |
| 9548 | |
| 9549 | if (r10_live) |
| 9550 | { |
| 9551 | r10 = gen_rtx_REG (Pmode, R10_REG); |
| 9552 | insn = emit_insn (gen_push (arg: r10)); |
| 9553 | allocate -= UNITS_PER_WORD; |
| 9554 | if (sp_is_cfa_reg || TARGET_SEH) |
| 9555 | { |
| 9556 | if (sp_is_cfa_reg) |
| 9557 | m->fs.cfa_offset += UNITS_PER_WORD; |
| 9558 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9559 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
| 9560 | gen_rtx_SET (stack_pointer_rtx, |
| 9561 | plus_constant (Pmode, |
| 9562 | stack_pointer_rtx, |
| 9563 | -UNITS_PER_WORD))); |
| 9564 | } |
| 9565 | } |
| 9566 | |
| 9567 | emit_move_insn (eax, GEN_INT (allocate)); |
| 9568 | emit_insn (gen_allocate_stack_worker_probe (Pmode, x0: eax, x1: eax)); |
| 9569 | |
| 9570 | /* Use the fact that AX still contains ALLOCATE. */ |
| 9571 | insn = emit_insn (gen_pro_epilogue_adjust_stack_sub |
| 9572 | (Pmode, stack_pointer_rtx, stack_pointer_rtx, x2: eax)); |
| 9573 | |
| 9574 | if (sp_is_cfa_reg || TARGET_SEH) |
| 9575 | { |
| 9576 | if (sp_is_cfa_reg) |
| 9577 | m->fs.cfa_offset += allocate; |
| 9578 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9579 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
| 9580 | gen_rtx_SET (stack_pointer_rtx, |
| 9581 | plus_constant (Pmode, stack_pointer_rtx, |
| 9582 | -allocate))); |
| 9583 | } |
| 9584 | m->fs.sp_offset += allocate; |
| 9585 | |
| 9586 | /* Use stack_pointer_rtx for relative addressing so that code works for |
| 9587 | realigned stack. But this means that we need a blockage to prevent |
| 9588 | stores based on the frame pointer from being scheduled before. */ |
| 9589 | if (r10_live && eax_live) |
| 9590 | { |
| 9591 | t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); |
| 9592 | emit_move_insn (gen_rtx_REG (word_mode, R10_REG), |
| 9593 | gen_frame_mem (word_mode, t)); |
| 9594 | t = plus_constant (Pmode, t, UNITS_PER_WORD); |
| 9595 | emit_move_insn (gen_rtx_REG (word_mode, AX_REG), |
| 9596 | gen_frame_mem (word_mode, t)); |
| 9597 | emit_insn (gen_memory_blockage ()); |
| 9598 | } |
| 9599 | else if (eax_live || r10_live) |
| 9600 | { |
| 9601 | t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); |
| 9602 | emit_move_insn (gen_rtx_REG (word_mode, |
| 9603 | (eax_live ? AX_REG : R10_REG)), |
| 9604 | gen_frame_mem (word_mode, t)); |
| 9605 | emit_insn (gen_memory_blockage ()); |
| 9606 | } |
| 9607 | } |
| 9608 | gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); |
| 9609 | |
| 9610 | /* If we havn't already set up the frame pointer, do so now. */ |
| 9611 | if (frame_pointer_needed && !m->fs.fp_valid) |
| 9612 | { |
| 9613 | insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, |
| 9614 | GEN_INT (frame.stack_pointer_offset |
| 9615 | - frame.hard_frame_pointer_offset)); |
| 9616 | insn = emit_insn (insn); |
| 9617 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9618 | add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); |
| 9619 | |
| 9620 | if (m->fs.cfa_reg == stack_pointer_rtx) |
| 9621 | m->fs.cfa_reg = hard_frame_pointer_rtx; |
| 9622 | m->fs.fp_offset = frame.hard_frame_pointer_offset; |
| 9623 | m->fs.fp_valid = true; |
| 9624 | } |
| 9625 | |
| 9626 | if (!int_registers_saved) |
| 9627 | ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset); |
| 9628 | if (!sse_registers_saved) |
| 9629 | ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset); |
| 9630 | else if (save_stub_call_needed) |
| 9631 | ix86_emit_outlined_ms2sysv_save (frame); |
| 9632 | |
| 9633 | /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT |
| 9634 | in PROLOGUE. */ |
| 9635 | if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry) |
| 9636 | { |
| 9637 | rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM); |
| 9638 | insn = emit_insn (gen_set_got (pic)); |
| 9639 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9640 | add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); |
| 9641 | emit_insn (gen_prologue_use (pic)); |
| 9642 | /* Deleting already emmitted SET_GOT if exist and allocated to |
| 9643 | REAL_PIC_OFFSET_TABLE_REGNUM. */ |
| 9644 | ix86_elim_entry_set_got (reg: pic); |
| 9645 | } |
| 9646 | |
| 9647 | if (crtl->drap_reg && !crtl->stack_realign_needed) |
| 9648 | { |
| 9649 | /* vDRAP is setup but after reload it turns out stack realign |
| 9650 | isn't necessary, here we will emit prologue to setup DRAP |
| 9651 | without stack realign adjustment */ |
| 9652 | t = choose_baseaddr (cfa_offset: 0, NULL); |
| 9653 | emit_insn (gen_rtx_SET (crtl->drap_reg, t)); |
| 9654 | } |
| 9655 | |
| 9656 | /* Prevent instructions from being scheduled into register save push |
| 9657 | sequence when access to the redzone area is done through frame pointer. |
| 9658 | The offset between the frame pointer and the stack pointer is calculated |
| 9659 | relative to the value of the stack pointer at the end of the function |
| 9660 | prologue, and moving instructions that access redzone area via frame |
| 9661 | pointer inside push sequence violates this assumption. */ |
| 9662 | if (frame_pointer_needed && frame.red_zone_size) |
| 9663 | emit_insn (gen_memory_blockage ()); |
| 9664 | |
| 9665 | /* SEH requires that the prologue end within 256 bytes of the start of |
| 9666 | the function. Prevent instruction schedules that would extend that. |
| 9667 | Further, prevent alloca modifications to the stack pointer from being |
| 9668 | combined with prologue modifications. */ |
| 9669 | if (TARGET_SEH) |
| 9670 | emit_insn (gen_prologue_use (stack_pointer_rtx)); |
| 9671 | } |
| 9672 | |
| 9673 | /* Emit code to restore REG using a POP or POPP insn. */ |
| 9674 | |
| 9675 | static void |
| 9676 | ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p) |
| 9677 | { |
| 9678 | struct machine_function *m = cfun->machine; |
| 9679 | rtx_insn *insn = emit_insn (gen_pop (arg: reg, ppx_p)); |
| 9680 | |
| 9681 | ix86_add_cfa_restore_note (insn, reg, cfa_offset: m->fs.sp_offset); |
| 9682 | m->fs.sp_offset -= UNITS_PER_WORD; |
| 9683 | |
| 9684 | if (m->fs.cfa_reg == crtl->drap_reg |
| 9685 | && REGNO (reg) == REGNO (crtl->drap_reg)) |
| 9686 | { |
| 9687 | /* Previously we'd represented the CFA as an expression |
| 9688 | like *(%ebp - 8). We've just popped that value from |
| 9689 | the stack, which means we need to reset the CFA to |
| 9690 | the drap register. This will remain until we restore |
| 9691 | the stack pointer. */ |
| 9692 | add_reg_note (insn, REG_CFA_DEF_CFA, reg); |
| 9693 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9694 | |
| 9695 | /* This means that the DRAP register is valid for addressing too. */ |
| 9696 | m->fs.drap_valid = true; |
| 9697 | return; |
| 9698 | } |
| 9699 | |
| 9700 | if (m->fs.cfa_reg == stack_pointer_rtx) |
| 9701 | { |
| 9702 | rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
| 9703 | x = gen_rtx_SET (stack_pointer_rtx, x); |
| 9704 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
| 9705 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9706 | |
| 9707 | m->fs.cfa_offset -= UNITS_PER_WORD; |
| 9708 | } |
| 9709 | |
| 9710 | /* When the frame pointer is the CFA, and we pop it, we are |
| 9711 | swapping back to the stack pointer as the CFA. This happens |
| 9712 | for stack frames that don't allocate other data, so we assume |
| 9713 | the stack pointer is now pointing at the return address, i.e. |
| 9714 | the function entry state, which makes the offset be 1 word. */ |
| 9715 | if (reg == hard_frame_pointer_rtx) |
| 9716 | { |
| 9717 | m->fs.fp_valid = false; |
| 9718 | if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
| 9719 | { |
| 9720 | m->fs.cfa_reg = stack_pointer_rtx; |
| 9721 | m->fs.cfa_offset -= UNITS_PER_WORD; |
| 9722 | |
| 9723 | add_reg_note (insn, REG_CFA_DEF_CFA, |
| 9724 | plus_constant (Pmode, stack_pointer_rtx, |
| 9725 | m->fs.cfa_offset)); |
| 9726 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9727 | } |
| 9728 | } |
| 9729 | } |
| 9730 | |
| 9731 | /* Emit code to restore REG using a POP2 insn. */ |
| 9732 | static void |
| 9733 | ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false) |
| 9734 | { |
| 9735 | struct machine_function *m = cfun->machine; |
| 9736 | const int offset = UNITS_PER_WORD * 2; |
| 9737 | rtx_insn *insn; |
| 9738 | |
| 9739 | rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode, |
| 9740 | stack_pointer_rtx)); |
| 9741 | |
| 9742 | if (ppx_p) |
| 9743 | insn = emit_insn (gen_pop2p_di (reg1, mem, reg2)); |
| 9744 | else |
| 9745 | insn = emit_insn (gen_pop2_di (reg1, mem, reg2)); |
| 9746 | |
| 9747 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9748 | |
| 9749 | rtx dwarf = NULL_RTX; |
| 9750 | dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf); |
| 9751 | dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf); |
| 9752 | REG_NOTES (insn) = dwarf; |
| 9753 | m->fs.sp_offset -= offset; |
| 9754 | |
| 9755 | if (m->fs.cfa_reg == crtl->drap_reg |
| 9756 | && (REGNO (reg1) == REGNO (crtl->drap_reg) |
| 9757 | || REGNO (reg2) == REGNO (crtl->drap_reg))) |
| 9758 | { |
| 9759 | /* Previously we'd represented the CFA as an expression |
| 9760 | like *(%ebp - 8). We've just popped that value from |
| 9761 | the stack, which means we need to reset the CFA to |
| 9762 | the drap register. This will remain until we restore |
| 9763 | the stack pointer. */ |
| 9764 | add_reg_note (insn, REG_CFA_DEF_CFA, |
| 9765 | REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2); |
| 9766 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9767 | |
| 9768 | /* This means that the DRAP register is valid for addressing too. */ |
| 9769 | m->fs.drap_valid = true; |
| 9770 | return; |
| 9771 | } |
| 9772 | |
| 9773 | if (m->fs.cfa_reg == stack_pointer_rtx) |
| 9774 | { |
| 9775 | rtx x = plus_constant (Pmode, stack_pointer_rtx, offset); |
| 9776 | x = gen_rtx_SET (stack_pointer_rtx, x); |
| 9777 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
| 9778 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9779 | |
| 9780 | m->fs.cfa_offset -= offset; |
| 9781 | } |
| 9782 | |
| 9783 | /* When the frame pointer is the CFA, and we pop it, we are |
| 9784 | swapping back to the stack pointer as the CFA. This happens |
| 9785 | for stack frames that don't allocate other data, so we assume |
| 9786 | the stack pointer is now pointing at the return address, i.e. |
| 9787 | the function entry state, which makes the offset be 1 word. */ |
| 9788 | if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx) |
| 9789 | { |
| 9790 | m->fs.fp_valid = false; |
| 9791 | if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
| 9792 | { |
| 9793 | m->fs.cfa_reg = stack_pointer_rtx; |
| 9794 | m->fs.cfa_offset -= offset; |
| 9795 | |
| 9796 | add_reg_note (insn, REG_CFA_DEF_CFA, |
| 9797 | plus_constant (Pmode, stack_pointer_rtx, |
| 9798 | m->fs.cfa_offset)); |
| 9799 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9800 | } |
| 9801 | } |
| 9802 | } |
| 9803 | |
| 9804 | /* Emit code to restore saved registers using POP insns. */ |
| 9805 | |
| 9806 | static void |
| 9807 | ix86_emit_restore_regs_using_pop (bool ppx_p) |
| 9808 | { |
| 9809 | unsigned int regno; |
| 9810 | |
| 9811 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 9812 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true)) |
| 9813 | ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno), ppx_p); |
| 9814 | } |
| 9815 | |
| 9816 | /* Emit code to restore saved registers using POP2 insns. */ |
| 9817 | |
| 9818 | static void |
| 9819 | ix86_emit_restore_regs_using_pop2 (void) |
| 9820 | { |
| 9821 | int regno; |
| 9822 | int regno_list[2]; |
| 9823 | regno_list[0] = regno_list[1] = -1; |
| 9824 | int loaded_regnum = 0; |
| 9825 | bool aligned = cfun->machine->fs.sp_offset % 16 == 0; |
| 9826 | |
| 9827 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 9828 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true)) |
| 9829 | { |
| 9830 | if (aligned) |
| 9831 | { |
| 9832 | regno_list[loaded_regnum++] = regno; |
| 9833 | if (loaded_regnum == 2) |
| 9834 | { |
| 9835 | gcc_assert (regno_list[0] != -1 |
| 9836 | && regno_list[1] != -1 |
| 9837 | && regno_list[0] != regno_list[1]); |
| 9838 | |
| 9839 | ix86_emit_restore_reg_using_pop2 (reg1: gen_rtx_REG (word_mode, |
| 9840 | regno_list[0]), |
| 9841 | reg2: gen_rtx_REG (word_mode, |
| 9842 | regno_list[1]), |
| 9843 | TARGET_APX_PPX); |
| 9844 | loaded_regnum = 0; |
| 9845 | regno_list[0] = regno_list[1] = -1; |
| 9846 | } |
| 9847 | } |
| 9848 | else |
| 9849 | { |
| 9850 | ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno), |
| 9851 | TARGET_APX_PPX); |
| 9852 | aligned = true; |
| 9853 | } |
| 9854 | } |
| 9855 | |
| 9856 | if (loaded_regnum == 1) |
| 9857 | ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno_list[0]), |
| 9858 | TARGET_APX_PPX); |
| 9859 | } |
| 9860 | |
| 9861 | /* Emit code and notes for the LEAVE instruction. If insn is non-null, |
| 9862 | omits the emit and only attaches the notes. */ |
| 9863 | |
| 9864 | static void |
| 9865 | ix86_emit_leave (rtx_insn *insn) |
| 9866 | { |
| 9867 | struct machine_function *m = cfun->machine; |
| 9868 | |
| 9869 | if (!insn) |
| 9870 | insn = emit_insn (gen_leave (arg0: word_mode)); |
| 9871 | |
| 9872 | ix86_add_queued_cfa_restore_notes (insn); |
| 9873 | |
| 9874 | gcc_assert (m->fs.fp_valid); |
| 9875 | m->fs.sp_valid = true; |
| 9876 | m->fs.sp_realigned = false; |
| 9877 | m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD; |
| 9878 | m->fs.fp_valid = false; |
| 9879 | |
| 9880 | if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
| 9881 | { |
| 9882 | m->fs.cfa_reg = stack_pointer_rtx; |
| 9883 | m->fs.cfa_offset = m->fs.sp_offset; |
| 9884 | |
| 9885 | add_reg_note (insn, REG_CFA_DEF_CFA, |
| 9886 | plus_constant (Pmode, stack_pointer_rtx, |
| 9887 | m->fs.sp_offset)); |
| 9888 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9889 | } |
| 9890 | ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, |
| 9891 | cfa_offset: m->fs.fp_offset); |
| 9892 | } |
| 9893 | |
| 9894 | /* Emit code to restore saved registers using MOV insns. |
| 9895 | First register is restored from CFA - CFA_OFFSET. */ |
| 9896 | static void |
| 9897 | ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, |
| 9898 | bool maybe_eh_return) |
| 9899 | { |
| 9900 | struct machine_function *m = cfun->machine; |
| 9901 | unsigned int regno; |
| 9902 | |
| 9903 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 9904 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true)) |
| 9905 | { |
| 9906 | |
| 9907 | /* Skip registers, already processed by shrink wrap separate. */ |
| 9908 | if (!cfun->machine->reg_is_wrapped_separately[regno]) |
| 9909 | { |
| 9910 | rtx reg = gen_rtx_REG (word_mode, regno); |
| 9911 | rtx mem; |
| 9912 | rtx_insn *insn; |
| 9913 | |
| 9914 | mem = choose_baseaddr (cfa_offset, NULL); |
| 9915 | mem = gen_frame_mem (word_mode, mem); |
| 9916 | insn = emit_move_insn (reg, mem); |
| 9917 | |
| 9918 | if (m->fs.cfa_reg == crtl->drap_reg |
| 9919 | && regno == REGNO (crtl->drap_reg)) |
| 9920 | { |
| 9921 | /* Previously we'd represented the CFA as an expression |
| 9922 | like *(%ebp - 8). We've just popped that value from |
| 9923 | the stack, which means we need to reset the CFA to |
| 9924 | the drap register. This will remain until we restore |
| 9925 | the stack pointer. */ |
| 9926 | add_reg_note (insn, REG_CFA_DEF_CFA, reg); |
| 9927 | RTX_FRAME_RELATED_P (insn) = 1; |
| 9928 | |
| 9929 | /* DRAP register is valid for addressing. */ |
| 9930 | m->fs.drap_valid = true; |
| 9931 | } |
| 9932 | else |
| 9933 | ix86_add_cfa_restore_note (NULL, reg, cfa_offset); |
| 9934 | } |
| 9935 | cfa_offset -= UNITS_PER_WORD; |
| 9936 | } |
| 9937 | } |
| 9938 | |
| 9939 | /* Emit code to restore saved registers using MOV insns. |
| 9940 | First register is restored from CFA - CFA_OFFSET. */ |
| 9941 | static void |
| 9942 | ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, |
| 9943 | bool maybe_eh_return) |
| 9944 | { |
| 9945 | unsigned int regno; |
| 9946 | |
| 9947 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 9948 | if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true)) |
| 9949 | { |
| 9950 | rtx reg = gen_rtx_REG (V4SFmode, regno); |
| 9951 | rtx mem; |
| 9952 | unsigned int align = GET_MODE_ALIGNMENT (V4SFmode); |
| 9953 | |
| 9954 | mem = choose_baseaddr (cfa_offset, align: &align); |
| 9955 | mem = gen_rtx_MEM (V4SFmode, mem); |
| 9956 | |
| 9957 | /* The location aligment depends upon the base register. */ |
| 9958 | align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align); |
| 9959 | gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); |
| 9960 | set_mem_align (mem, align); |
| 9961 | emit_insn (gen_rtx_SET (reg, mem)); |
| 9962 | |
| 9963 | ix86_add_cfa_restore_note (NULL, reg, cfa_offset); |
| 9964 | |
| 9965 | cfa_offset -= GET_MODE_SIZE (V4SFmode); |
| 9966 | } |
| 9967 | } |
| 9968 | |
| 9969 | static void |
| 9970 | ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame, |
| 9971 | bool use_call, int style) |
| 9972 | { |
| 9973 | struct machine_function *m = cfun->machine; |
| 9974 | const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS |
| 9975 | + m->call_ms2sysv_extra_regs; |
| 9976 | rtvec v; |
| 9977 | unsigned int elems_needed, align, i, vi = 0; |
| 9978 | rtx_insn *insn; |
| 9979 | rtx sym, tmp; |
| 9980 | rtx rsi = gen_rtx_REG (word_mode, SI_REG); |
| 9981 | rtx r10 = NULL_RTX; |
| 9982 | const class xlogue_layout &xlogue = xlogue_layout::get_instance (); |
| 9983 | HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset (); |
| 9984 | HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset; |
| 9985 | rtx rsi_frame_load = NULL_RTX; |
| 9986 | HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1; |
| 9987 | enum xlogue_stub stub; |
| 9988 | |
| 9989 | gcc_assert (!m->fs.fp_valid || frame_pointer_needed); |
| 9990 | |
| 9991 | /* If using a realigned stack, we should never start with padding. */ |
| 9992 | gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ()); |
| 9993 | |
| 9994 | /* Setup RSI as the stub's base pointer. */ |
| 9995 | align = GET_MODE_ALIGNMENT (V4SFmode); |
| 9996 | tmp = choose_baseaddr (cfa_offset: rsi_offset, align: &align, SI_REG); |
| 9997 | gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); |
| 9998 | |
| 9999 | emit_insn (gen_rtx_SET (rsi, tmp)); |
| 10000 | |
| 10001 | /* Get a symbol for the stub. */ |
| 10002 | if (frame_pointer_needed) |
| 10003 | stub = use_call ? XLOGUE_STUB_RESTORE_HFP |
| 10004 | : XLOGUE_STUB_RESTORE_HFP_TAIL; |
| 10005 | else |
| 10006 | stub = use_call ? XLOGUE_STUB_RESTORE |
| 10007 | : XLOGUE_STUB_RESTORE_TAIL; |
| 10008 | sym = xlogue.get_stub_rtx (stub); |
| 10009 | |
| 10010 | elems_needed = ncregs; |
| 10011 | if (use_call) |
| 10012 | elems_needed += 1; |
| 10013 | else |
| 10014 | elems_needed += frame_pointer_needed ? 5 : 3; |
| 10015 | v = rtvec_alloc (elems_needed); |
| 10016 | |
| 10017 | /* We call the epilogue stub when we need to pop incoming args or we are |
| 10018 | doing a sibling call as the tail. Otherwise, we will emit a jmp to the |
| 10019 | epilogue stub and it is the tail-call. */ |
| 10020 | if (use_call) |
| 10021 | RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); |
| 10022 | else |
| 10023 | { |
| 10024 | RTVEC_ELT (v, vi++) = ret_rtx; |
| 10025 | RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); |
| 10026 | if (frame_pointer_needed) |
| 10027 | { |
| 10028 | rtx rbp = gen_rtx_REG (DImode, BP_REG); |
| 10029 | gcc_assert (m->fs.fp_valid); |
| 10030 | gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx); |
| 10031 | |
| 10032 | tmp = plus_constant (DImode, rbp, 8); |
| 10033 | RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp); |
| 10034 | RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp)); |
| 10035 | tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)); |
| 10036 | RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp); |
| 10037 | } |
| 10038 | else |
| 10039 | { |
| 10040 | /* If no hard frame pointer, we set R10 to the SP restore value. */ |
| 10041 | gcc_assert (!m->fs.fp_valid); |
| 10042 | gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); |
| 10043 | gcc_assert (m->fs.sp_valid); |
| 10044 | |
| 10045 | r10 = gen_rtx_REG (DImode, R10_REG); |
| 10046 | tmp = plus_constant (Pmode, rsi, stub_ptr_offset); |
| 10047 | emit_insn (gen_rtx_SET (r10, tmp)); |
| 10048 | |
| 10049 | RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10); |
| 10050 | } |
| 10051 | } |
| 10052 | |
| 10053 | /* Generate frame load insns and restore notes. */ |
| 10054 | for (i = 0; i < ncregs; ++i) |
| 10055 | { |
| 10056 | const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i); |
| 10057 | machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode; |
| 10058 | rtx reg, frame_load; |
| 10059 | |
| 10060 | reg = gen_rtx_REG (mode, r.regno); |
| 10061 | frame_load = gen_frame_load (reg, frame_reg: rsi, offset: r.offset); |
| 10062 | |
| 10063 | /* Save RSI frame load insn & note to add last. */ |
| 10064 | if (r.regno == SI_REG) |
| 10065 | { |
| 10066 | gcc_assert (!rsi_frame_load); |
| 10067 | rsi_frame_load = frame_load; |
| 10068 | rsi_restore_offset = r.offset; |
| 10069 | } |
| 10070 | else |
| 10071 | { |
| 10072 | RTVEC_ELT (v, vi++) = frame_load; |
| 10073 | ix86_add_cfa_restore_note (NULL, reg, cfa_offset: r.offset); |
| 10074 | } |
| 10075 | } |
| 10076 | |
| 10077 | /* Add RSI frame load & restore note at the end. */ |
| 10078 | gcc_assert (rsi_frame_load); |
| 10079 | gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1); |
| 10080 | RTVEC_ELT (v, vi++) = rsi_frame_load; |
| 10081 | ix86_add_cfa_restore_note (NULL, reg: gen_rtx_REG (DImode, SI_REG), |
| 10082 | cfa_offset: rsi_restore_offset); |
| 10083 | |
| 10084 | /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */ |
| 10085 | if (!use_call && !frame_pointer_needed) |
| 10086 | { |
| 10087 | gcc_assert (m->fs.sp_valid); |
| 10088 | gcc_assert (!m->fs.sp_realigned); |
| 10089 | |
| 10090 | /* At this point, R10 should point to frame.stack_realign_offset. */ |
| 10091 | if (m->fs.cfa_reg == stack_pointer_rtx) |
| 10092 | m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset; |
| 10093 | m->fs.sp_offset = frame.stack_realign_offset; |
| 10094 | } |
| 10095 | |
| 10096 | gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v)); |
| 10097 | tmp = gen_rtx_PARALLEL (VOIDmode, v); |
| 10098 | if (use_call) |
| 10099 | insn = emit_insn (tmp); |
| 10100 | else |
| 10101 | { |
| 10102 | insn = emit_jump_insn (tmp); |
| 10103 | JUMP_LABEL (insn) = ret_rtx; |
| 10104 | |
| 10105 | if (frame_pointer_needed) |
| 10106 | ix86_emit_leave (insn); |
| 10107 | else |
| 10108 | { |
| 10109 | /* Need CFA adjust note. */ |
| 10110 | tmp = gen_rtx_SET (stack_pointer_rtx, r10); |
| 10111 | add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp); |
| 10112 | } |
| 10113 | } |
| 10114 | |
| 10115 | RTX_FRAME_RELATED_P (insn) = true; |
| 10116 | ix86_add_queued_cfa_restore_notes (insn); |
| 10117 | |
| 10118 | /* If we're not doing a tail-call, we need to adjust the stack. */ |
| 10119 | if (use_call && m->fs.sp_valid) |
| 10120 | { |
| 10121 | HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset; |
| 10122 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 10123 | GEN_INT (dealloc), style, |
| 10124 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
| 10125 | } |
| 10126 | } |
| 10127 | |
| 10128 | /* Restore function stack, frame, and registers. */ |
| 10129 | |
| 10130 | void |
| 10131 | ix86_expand_epilogue (int style) |
| 10132 | { |
| 10133 | struct machine_function *m = cfun->machine; |
| 10134 | struct machine_frame_state frame_state_save = m->fs; |
| 10135 | bool restore_regs_via_mov; |
| 10136 | bool using_drap; |
| 10137 | bool restore_stub_is_tail = false; |
| 10138 | |
| 10139 | if (ix86_function_naked (fn: current_function_decl)) |
| 10140 | { |
| 10141 | /* The program should not reach this point. */ |
| 10142 | emit_insn (gen_ud2 ()); |
| 10143 | return; |
| 10144 | } |
| 10145 | |
| 10146 | ix86_finalize_stack_frame_flags (); |
| 10147 | const struct ix86_frame &frame = cfun->machine->frame; |
| 10148 | |
| 10149 | m->fs.sp_realigned = stack_realign_fp; |
| 10150 | m->fs.sp_valid = stack_realign_fp |
| 10151 | || !frame_pointer_needed |
| 10152 | || crtl->sp_is_unchanging; |
| 10153 | gcc_assert (!m->fs.sp_valid |
| 10154 | || m->fs.sp_offset == frame.stack_pointer_offset); |
| 10155 | |
| 10156 | /* The FP must be valid if the frame pointer is present. */ |
| 10157 | gcc_assert (frame_pointer_needed == m->fs.fp_valid); |
| 10158 | gcc_assert (!m->fs.fp_valid |
| 10159 | || m->fs.fp_offset == frame.hard_frame_pointer_offset); |
| 10160 | |
| 10161 | /* We must have *some* valid pointer to the stack frame. */ |
| 10162 | gcc_assert (m->fs.sp_valid || m->fs.fp_valid); |
| 10163 | |
| 10164 | /* The DRAP is never valid at this point. */ |
| 10165 | gcc_assert (!m->fs.drap_valid); |
| 10166 | |
| 10167 | /* See the comment about red zone and frame |
| 10168 | pointer usage in ix86_expand_prologue. */ |
| 10169 | if (frame_pointer_needed && frame.red_zone_size) |
| 10170 | emit_insn (gen_memory_blockage ()); |
| 10171 | |
| 10172 | using_drap = crtl->drap_reg && crtl->stack_realign_needed; |
| 10173 | gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg); |
| 10174 | |
| 10175 | /* Determine the CFA offset of the end of the red-zone. */ |
| 10176 | m->fs.red_zone_offset = 0; |
| 10177 | if (ix86_using_red_zone () && crtl->args.pops_args < 65536) |
| 10178 | { |
| 10179 | /* The red-zone begins below return address and error code in |
| 10180 | exception handler. */ |
| 10181 | m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET; |
| 10182 | |
| 10183 | /* When the register save area is in the aligned portion of |
| 10184 | the stack, determine the maximum runtime displacement that |
| 10185 | matches up with the aligned frame. */ |
| 10186 | if (stack_realign_drap) |
| 10187 | m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT |
| 10188 | + UNITS_PER_WORD); |
| 10189 | } |
| 10190 | |
| 10191 | HOST_WIDE_INT reg_save_offset = frame.reg_save_offset; |
| 10192 | |
| 10193 | /* Special care must be taken for the normal return case of a function |
| 10194 | using eh_return: the eax and edx registers are marked as saved, but |
| 10195 | not restored along this path. Adjust the save location to match. */ |
| 10196 | if (crtl->calls_eh_return && style != 2) |
| 10197 | reg_save_offset -= 2 * UNITS_PER_WORD; |
| 10198 | |
| 10199 | /* EH_RETURN requires the use of moves to function properly. */ |
| 10200 | if (crtl->calls_eh_return) |
| 10201 | restore_regs_via_mov = true; |
| 10202 | /* SEH requires the use of pops to identify the epilogue. */ |
| 10203 | else if (TARGET_SEH) |
| 10204 | restore_regs_via_mov = false; |
| 10205 | /* If we already save reg with pushp, don't use move at epilogue. */ |
| 10206 | else if (m->fs.apx_ppx_used) |
| 10207 | restore_regs_via_mov = false; |
| 10208 | /* If we're only restoring one register and sp cannot be used then |
| 10209 | using a move instruction to restore the register since it's |
| 10210 | less work than reloading sp and popping the register. */ |
| 10211 | else if (!sp_valid_at (cfa_offset: frame.hfp_save_offset) && frame.nregs <= 1) |
| 10212 | restore_regs_via_mov = true; |
| 10213 | else if (crtl->shrink_wrapped_separate |
| 10214 | || (TARGET_EPILOGUE_USING_MOVE |
| 10215 | && cfun->machine->use_fast_prologue_epilogue |
| 10216 | && (frame.nregs > 1 |
| 10217 | || m->fs.sp_offset != reg_save_offset))) |
| 10218 | restore_regs_via_mov = true; |
| 10219 | else if (frame_pointer_needed |
| 10220 | && !frame.nregs |
| 10221 | && m->fs.sp_offset != reg_save_offset) |
| 10222 | restore_regs_via_mov = true; |
| 10223 | else if (frame_pointer_needed |
| 10224 | && TARGET_USE_LEAVE |
| 10225 | && cfun->machine->use_fast_prologue_epilogue |
| 10226 | && frame.nregs == 1) |
| 10227 | restore_regs_via_mov = true; |
| 10228 | else |
| 10229 | restore_regs_via_mov = false; |
| 10230 | |
| 10231 | if (crtl->shrink_wrapped_separate) |
| 10232 | gcc_assert (restore_regs_via_mov); |
| 10233 | |
| 10234 | if (restore_regs_via_mov || frame.nsseregs) |
| 10235 | { |
| 10236 | /* Ensure that the entire register save area is addressable via |
| 10237 | the stack pointer, if we will restore SSE regs via sp. */ |
| 10238 | if (TARGET_64BIT |
| 10239 | && m->fs.sp_offset > 0x7fffffff |
| 10240 | && sp_valid_at (cfa_offset: frame.stack_realign_offset + 1) |
| 10241 | && (frame.nsseregs + frame.nregs) != 0) |
| 10242 | { |
| 10243 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 10244 | GEN_INT (m->fs.sp_offset |
| 10245 | - frame.sse_reg_save_offset), |
| 10246 | style, |
| 10247 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
| 10248 | } |
| 10249 | } |
| 10250 | |
| 10251 | /* If there are any SSE registers to restore, then we have to do it |
| 10252 | via moves, since there's obviously no pop for SSE regs. */ |
| 10253 | if (frame.nsseregs) |
| 10254 | ix86_emit_restore_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset, |
| 10255 | maybe_eh_return: style == 2); |
| 10256 | |
| 10257 | if (m->call_ms2sysv) |
| 10258 | { |
| 10259 | int pop_incoming_args = crtl->args.pops_args && crtl->args.size; |
| 10260 | |
| 10261 | /* We cannot use a tail-call for the stub if: |
| 10262 | 1. We have to pop incoming args, |
| 10263 | 2. We have additional int regs to restore, or |
| 10264 | 3. A sibling call will be the tail-call, or |
| 10265 | 4. We are emitting an eh_return_internal epilogue. |
| 10266 | |
| 10267 | TODO: Item 4 has not yet tested! |
| 10268 | |
| 10269 | If any of the above are true, we will call the stub rather than |
| 10270 | jump to it. */ |
| 10271 | restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1); |
| 10272 | ix86_emit_outlined_ms2sysv_restore (frame, use_call: !restore_stub_is_tail, style); |
| 10273 | } |
| 10274 | |
| 10275 | /* If using out-of-line stub that is a tail-call, then...*/ |
| 10276 | if (m->call_ms2sysv && restore_stub_is_tail) |
| 10277 | { |
| 10278 | /* TODO: parinoid tests. (remove eventually) */ |
| 10279 | gcc_assert (m->fs.sp_valid); |
| 10280 | gcc_assert (!m->fs.sp_realigned); |
| 10281 | gcc_assert (!m->fs.fp_valid); |
| 10282 | gcc_assert (!m->fs.realigned); |
| 10283 | gcc_assert (m->fs.sp_offset == UNITS_PER_WORD); |
| 10284 | gcc_assert (!crtl->drap_reg); |
| 10285 | gcc_assert (!frame.nregs); |
| 10286 | gcc_assert (!crtl->shrink_wrapped_separate); |
| 10287 | } |
| 10288 | else if (restore_regs_via_mov) |
| 10289 | { |
| 10290 | rtx t; |
| 10291 | |
| 10292 | if (frame.nregs) |
| 10293 | ix86_emit_restore_regs_using_mov (cfa_offset: reg_save_offset, maybe_eh_return: style == 2); |
| 10294 | |
| 10295 | /* eh_return epilogues need %ecx added to the stack pointer. */ |
| 10296 | if (style == 2) |
| 10297 | { |
| 10298 | rtx sa = EH_RETURN_STACKADJ_RTX; |
| 10299 | rtx_insn *insn; |
| 10300 | |
| 10301 | gcc_assert (!crtl->shrink_wrapped_separate); |
| 10302 | |
| 10303 | /* Stack realignment doesn't work with eh_return. */ |
| 10304 | if (crtl->stack_realign_needed) |
| 10305 | sorry ("Stack realignment not supported with " |
| 10306 | "%<__builtin_eh_return%>" ); |
| 10307 | |
| 10308 | /* regparm nested functions don't work with eh_return. */ |
| 10309 | if (ix86_static_chain_on_stack) |
| 10310 | sorry ("regparm nested function not supported with " |
| 10311 | "%<__builtin_eh_return%>" ); |
| 10312 | |
| 10313 | if (frame_pointer_needed) |
| 10314 | { |
| 10315 | t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); |
| 10316 | t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD); |
| 10317 | emit_insn (gen_rtx_SET (sa, t)); |
| 10318 | |
| 10319 | /* NB: eh_return epilogues must restore the frame pointer |
| 10320 | in word_mode since the upper 32 bits of RBP register |
| 10321 | can have any values. */ |
| 10322 | t = gen_frame_mem (word_mode, hard_frame_pointer_rtx); |
| 10323 | rtx frame_reg = gen_rtx_REG (word_mode, |
| 10324 | HARD_FRAME_POINTER_REGNUM); |
| 10325 | insn = emit_move_insn (frame_reg, t); |
| 10326 | |
| 10327 | /* Note that we use SA as a temporary CFA, as the return |
| 10328 | address is at the proper place relative to it. We |
| 10329 | pretend this happens at the FP restore insn because |
| 10330 | prior to this insn the FP would be stored at the wrong |
| 10331 | offset relative to SA, and after this insn we have no |
| 10332 | other reasonable register to use for the CFA. We don't |
| 10333 | bother resetting the CFA to the SP for the duration of |
| 10334 | the return insn, unless the control flow instrumentation |
| 10335 | is done. In this case the SP is used later and we have |
| 10336 | to reset CFA to SP. */ |
| 10337 | add_reg_note (insn, REG_CFA_DEF_CFA, |
| 10338 | plus_constant (Pmode, sa, UNITS_PER_WORD)); |
| 10339 | ix86_add_queued_cfa_restore_notes (insn); |
| 10340 | add_reg_note (insn, REG_CFA_RESTORE, frame_reg); |
| 10341 | RTX_FRAME_RELATED_P (insn) = 1; |
| 10342 | |
| 10343 | m->fs.cfa_reg = sa; |
| 10344 | m->fs.cfa_offset = UNITS_PER_WORD; |
| 10345 | m->fs.fp_valid = false; |
| 10346 | |
| 10347 | pro_epilogue_adjust_stack (stack_pointer_rtx, src: sa, |
| 10348 | const0_rtx, style, |
| 10349 | flag_cf_protection); |
| 10350 | } |
| 10351 | else |
| 10352 | { |
| 10353 | t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); |
| 10354 | t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD); |
| 10355 | insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t)); |
| 10356 | ix86_add_queued_cfa_restore_notes (insn); |
| 10357 | |
| 10358 | gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); |
| 10359 | if (m->fs.cfa_offset != UNITS_PER_WORD) |
| 10360 | { |
| 10361 | m->fs.cfa_offset = UNITS_PER_WORD; |
| 10362 | add_reg_note (insn, REG_CFA_DEF_CFA, |
| 10363 | plus_constant (Pmode, stack_pointer_rtx, |
| 10364 | UNITS_PER_WORD)); |
| 10365 | RTX_FRAME_RELATED_P (insn) = 1; |
| 10366 | } |
| 10367 | } |
| 10368 | m->fs.sp_offset = UNITS_PER_WORD; |
| 10369 | m->fs.sp_valid = true; |
| 10370 | m->fs.sp_realigned = false; |
| 10371 | } |
| 10372 | } |
| 10373 | else |
| 10374 | { |
| 10375 | /* SEH requires that the function end with (1) a stack adjustment |
| 10376 | if necessary, (2) a sequence of pops, and (3) a return or |
| 10377 | jump instruction. Prevent insns from the function body from |
| 10378 | being scheduled into this sequence. */ |
| 10379 | if (TARGET_SEH) |
| 10380 | { |
| 10381 | /* Prevent a catch region from being adjacent to the standard |
| 10382 | epilogue sequence. Unfortunately neither crtl->uses_eh_lsda |
| 10383 | nor several other flags that would be interesting to test are |
| 10384 | set up yet. */ |
| 10385 | if (flag_non_call_exceptions) |
| 10386 | emit_insn (gen_nops (const1_rtx)); |
| 10387 | else |
| 10388 | emit_insn (gen_blockage ()); |
| 10389 | } |
| 10390 | |
| 10391 | /* First step is to deallocate the stack frame so that we can |
| 10392 | pop the registers. If the stack pointer was realigned, it needs |
| 10393 | to be restored now. Also do it on SEH target for very large |
| 10394 | frame as the emitted instructions aren't allowed by the ABI |
| 10395 | in epilogues. */ |
| 10396 | if (!m->fs.sp_valid || m->fs.sp_realigned |
| 10397 | || (TARGET_SEH |
| 10398 | && (m->fs.sp_offset - reg_save_offset |
| 10399 | >= SEH_MAX_FRAME_SIZE))) |
| 10400 | { |
| 10401 | pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx, |
| 10402 | GEN_INT (m->fs.fp_offset |
| 10403 | - reg_save_offset), |
| 10404 | style, set_cfa: false); |
| 10405 | } |
| 10406 | else if (m->fs.sp_offset != reg_save_offset) |
| 10407 | { |
| 10408 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 10409 | GEN_INT (m->fs.sp_offset |
| 10410 | - reg_save_offset), |
| 10411 | style, |
| 10412 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
| 10413 | } |
| 10414 | |
| 10415 | if (TARGET_APX_PUSH2POP2 |
| 10416 | && ix86_can_use_push2pop2 () |
| 10417 | && m->func_type == TYPE_NORMAL) |
| 10418 | ix86_emit_restore_regs_using_pop2 (); |
| 10419 | else |
| 10420 | ix86_emit_restore_regs_using_pop (TARGET_APX_PPX); |
| 10421 | } |
| 10422 | |
| 10423 | /* If we used a stack pointer and haven't already got rid of it, |
| 10424 | then do so now. */ |
| 10425 | if (m->fs.fp_valid) |
| 10426 | { |
| 10427 | /* If the stack pointer is valid and pointing at the frame |
| 10428 | pointer store address, then we only need a pop. */ |
| 10429 | if (sp_valid_at (cfa_offset: frame.hfp_save_offset) |
| 10430 | && m->fs.sp_offset == frame.hfp_save_offset) |
| 10431 | ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); |
| 10432 | /* Leave results in shorter dependency chains on CPUs that are |
| 10433 | able to grok it fast. */ |
| 10434 | else if (TARGET_USE_LEAVE |
| 10435 | || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) |
| 10436 | || !cfun->machine->use_fast_prologue_epilogue) |
| 10437 | ix86_emit_leave (NULL); |
| 10438 | else |
| 10439 | { |
| 10440 | pro_epilogue_adjust_stack (stack_pointer_rtx, |
| 10441 | hard_frame_pointer_rtx, |
| 10442 | const0_rtx, style, set_cfa: !using_drap); |
| 10443 | ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); |
| 10444 | } |
| 10445 | } |
| 10446 | |
| 10447 | if (using_drap) |
| 10448 | { |
| 10449 | int param_ptr_offset = UNITS_PER_WORD; |
| 10450 | rtx_insn *insn; |
| 10451 | |
| 10452 | gcc_assert (stack_realign_drap); |
| 10453 | |
| 10454 | if (ix86_static_chain_on_stack) |
| 10455 | param_ptr_offset += UNITS_PER_WORD; |
| 10456 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
| 10457 | param_ptr_offset += UNITS_PER_WORD; |
| 10458 | |
| 10459 | insn = emit_insn (gen_rtx_SET |
| 10460 | (stack_pointer_rtx, |
| 10461 | plus_constant (Pmode, crtl->drap_reg, |
| 10462 | -param_ptr_offset))); |
| 10463 | m->fs.cfa_reg = stack_pointer_rtx; |
| 10464 | m->fs.cfa_offset = param_ptr_offset; |
| 10465 | m->fs.sp_offset = param_ptr_offset; |
| 10466 | m->fs.realigned = false; |
| 10467 | |
| 10468 | add_reg_note (insn, REG_CFA_DEF_CFA, |
| 10469 | plus_constant (Pmode, stack_pointer_rtx, |
| 10470 | param_ptr_offset)); |
| 10471 | RTX_FRAME_RELATED_P (insn) = 1; |
| 10472 | |
| 10473 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
| 10474 | ix86_emit_restore_reg_using_pop (crtl->drap_reg); |
| 10475 | } |
| 10476 | |
| 10477 | /* At this point the stack pointer must be valid, and we must have |
| 10478 | restored all of the registers. We may not have deallocated the |
| 10479 | entire stack frame. We've delayed this until now because it may |
| 10480 | be possible to merge the local stack deallocation with the |
| 10481 | deallocation forced by ix86_static_chain_on_stack. */ |
| 10482 | gcc_assert (m->fs.sp_valid); |
| 10483 | gcc_assert (!m->fs.sp_realigned); |
| 10484 | gcc_assert (!m->fs.fp_valid); |
| 10485 | gcc_assert (!m->fs.realigned); |
| 10486 | if (m->fs.sp_offset != UNITS_PER_WORD) |
| 10487 | { |
| 10488 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 10489 | GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), |
| 10490 | style, set_cfa: true); |
| 10491 | } |
| 10492 | else |
| 10493 | ix86_add_queued_cfa_restore_notes (insn: get_last_insn ()); |
| 10494 | |
| 10495 | /* Sibcall epilogues don't want a return instruction. */ |
| 10496 | if (style == 0) |
| 10497 | { |
| 10498 | m->fs = frame_state_save; |
| 10499 | return; |
| 10500 | } |
| 10501 | |
| 10502 | if (cfun->machine->func_type != TYPE_NORMAL) |
| 10503 | emit_jump_insn (gen_interrupt_return ()); |
| 10504 | else if (crtl->args.pops_args && crtl->args.size) |
| 10505 | { |
| 10506 | rtx popc = GEN_INT (crtl->args.pops_args); |
| 10507 | |
| 10508 | /* i386 can only pop 64K bytes. If asked to pop more, pop return |
| 10509 | address, do explicit add, and jump indirectly to the caller. */ |
| 10510 | |
| 10511 | if (crtl->args.pops_args >= 65536) |
| 10512 | { |
| 10513 | rtx ecx = gen_rtx_REG (SImode, CX_REG); |
| 10514 | rtx_insn *insn; |
| 10515 | |
| 10516 | /* There is no "pascal" calling convention in any 64bit ABI. */ |
| 10517 | gcc_assert (!TARGET_64BIT); |
| 10518 | |
| 10519 | insn = emit_insn (gen_pop (arg: ecx)); |
| 10520 | m->fs.cfa_offset -= UNITS_PER_WORD; |
| 10521 | m->fs.sp_offset -= UNITS_PER_WORD; |
| 10522 | |
| 10523 | rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
| 10524 | x = gen_rtx_SET (stack_pointer_rtx, x); |
| 10525 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
| 10526 | add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); |
| 10527 | RTX_FRAME_RELATED_P (insn) = 1; |
| 10528 | |
| 10529 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| 10530 | offset: popc, style: -1, set_cfa: true); |
| 10531 | emit_jump_insn (gen_simple_return_indirect_internal (ecx)); |
| 10532 | } |
| 10533 | else |
| 10534 | emit_jump_insn (gen_simple_return_pop_internal (popc)); |
| 10535 | } |
| 10536 | else if (!m->call_ms2sysv || !restore_stub_is_tail) |
| 10537 | { |
| 10538 | /* In case of return from EH a simple return cannot be used |
| 10539 | as a return address will be compared with a shadow stack |
| 10540 | return address. Use indirect jump instead. */ |
| 10541 | if (style == 2 && flag_cf_protection) |
| 10542 | { |
| 10543 | /* Register used in indirect jump must be in word_mode. But |
| 10544 | Pmode may not be the same as word_mode for x32. */ |
| 10545 | rtx ecx = gen_rtx_REG (word_mode, CX_REG); |
| 10546 | rtx_insn *insn; |
| 10547 | |
| 10548 | insn = emit_insn (gen_pop (arg: ecx)); |
| 10549 | m->fs.cfa_offset -= UNITS_PER_WORD; |
| 10550 | m->fs.sp_offset -= UNITS_PER_WORD; |
| 10551 | |
| 10552 | rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
| 10553 | x = gen_rtx_SET (stack_pointer_rtx, x); |
| 10554 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
| 10555 | add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); |
| 10556 | RTX_FRAME_RELATED_P (insn) = 1; |
| 10557 | |
| 10558 | emit_jump_insn (gen_simple_return_indirect_internal (ecx)); |
| 10559 | } |
| 10560 | else |
| 10561 | emit_jump_insn (gen_simple_return_internal ()); |
| 10562 | } |
| 10563 | |
| 10564 | /* Restore the state back to the state from the prologue, |
| 10565 | so that it's correct for the next epilogue. */ |
| 10566 | m->fs = frame_state_save; |
| 10567 | } |
| 10568 | |
| 10569 | /* Reset from the function's potential modifications. */ |
| 10570 | |
| 10571 | static void |
| 10572 | ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED) |
| 10573 | { |
| 10574 | if (pic_offset_table_rtx |
| 10575 | && !ix86_use_pseudo_pic_reg ()) |
| 10576 | SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); |
| 10577 | |
| 10578 | if (TARGET_MACHO) |
| 10579 | { |
| 10580 | rtx_insn *insn = get_last_insn (); |
| 10581 | rtx_insn *deleted_debug_label = NULL; |
| 10582 | |
| 10583 | /* Mach-O doesn't support labels at the end of objects, so if |
| 10584 | it looks like we might want one, take special action. |
| 10585 | First, collect any sequence of deleted debug labels. */ |
| 10586 | while (insn |
| 10587 | && NOTE_P (insn) |
| 10588 | && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) |
| 10589 | { |
| 10590 | /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL |
| 10591 | notes only, instead set their CODE_LABEL_NUMBER to -1, |
| 10592 | otherwise there would be code generation differences |
| 10593 | in between -g and -g0. */ |
| 10594 | if (NOTE_P (insn) && NOTE_KIND (insn) |
| 10595 | == NOTE_INSN_DELETED_DEBUG_LABEL) |
| 10596 | deleted_debug_label = insn; |
| 10597 | insn = PREV_INSN (insn); |
| 10598 | } |
| 10599 | |
| 10600 | /* If we have: |
| 10601 | label: |
| 10602 | barrier |
| 10603 | then this needs to be detected, so skip past the barrier. */ |
| 10604 | |
| 10605 | if (insn && BARRIER_P (insn)) |
| 10606 | insn = PREV_INSN (insn); |
| 10607 | |
| 10608 | /* Up to now we've only seen notes or barriers. */ |
| 10609 | if (insn) |
| 10610 | { |
| 10611 | if (LABEL_P (insn) |
| 10612 | || (NOTE_P (insn) |
| 10613 | && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) |
| 10614 | /* Trailing label. */ |
| 10615 | fputs (s: "\tnop\n" , stream: file); |
| 10616 | else if (cfun && ! cfun->is_thunk) |
| 10617 | { |
| 10618 | /* See if we have a completely empty function body, skipping |
| 10619 | the special case of the picbase thunk emitted as asm. */ |
| 10620 | while (insn && ! INSN_P (insn)) |
| 10621 | insn = PREV_INSN (insn); |
| 10622 | /* If we don't find any insns, we've got an empty function body; |
| 10623 | I.e. completely empty - without a return or branch. This is |
| 10624 | taken as the case where a function body has been removed |
| 10625 | because it contains an inline __builtin_unreachable(). GCC |
| 10626 | declares that reaching __builtin_unreachable() means UB so |
| 10627 | we're not obliged to do anything special; however, we want |
| 10628 | non-zero-sized function bodies. To meet this, and help the |
| 10629 | user out, let's trap the case. */ |
| 10630 | if (insn == NULL) |
| 10631 | fputs (s: "\tud2\n" , stream: file); |
| 10632 | } |
| 10633 | } |
| 10634 | else if (deleted_debug_label) |
| 10635 | for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) |
| 10636 | if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) |
| 10637 | CODE_LABEL_NUMBER (insn) = -1; |
| 10638 | } |
| 10639 | } |
| 10640 | |
| 10641 | /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */ |
| 10642 | |
| 10643 | void |
| 10644 | ix86_print_patchable_function_entry (FILE *file, |
| 10645 | unsigned HOST_WIDE_INT patch_area_size, |
| 10646 | bool record_p) |
| 10647 | { |
| 10648 | if (cfun->machine->function_label_emitted) |
| 10649 | { |
| 10650 | /* NB: When ix86_print_patchable_function_entry is called after |
| 10651 | function table has been emitted, we have inserted or queued |
| 10652 | a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper |
| 10653 | place. There is nothing to do here. */ |
| 10654 | return; |
| 10655 | } |
| 10656 | |
| 10657 | default_print_patchable_function_entry (file, patch_area_size, |
| 10658 | record_p); |
| 10659 | } |
| 10660 | |
| 10661 | /* Output patchable area. NB: default_print_patchable_function_entry |
| 10662 | isn't available in i386.md. */ |
| 10663 | |
| 10664 | void |
| 10665 | ix86_output_patchable_area (unsigned int patch_area_size, |
| 10666 | bool record_p) |
| 10667 | { |
| 10668 | default_print_patchable_function_entry (asm_out_file, |
| 10669 | patch_area_size, |
| 10670 | record_p); |
| 10671 | } |
| 10672 | |
| 10673 | /* Return a scratch register to use in the split stack prologue. The |
| 10674 | split stack prologue is used for -fsplit-stack. It is the first |
| 10675 | instructions in the function, even before the regular prologue. |
| 10676 | The scratch register can be any caller-saved register which is not |
| 10677 | used for parameters or for the static chain. */ |
| 10678 | |
| 10679 | static unsigned int |
| 10680 | split_stack_prologue_scratch_regno (void) |
| 10681 | { |
| 10682 | if (TARGET_64BIT) |
| 10683 | return R11_REG; |
| 10684 | else |
| 10685 | { |
| 10686 | bool is_fastcall, is_thiscall; |
| 10687 | int regparm; |
| 10688 | |
| 10689 | is_fastcall = (lookup_attribute (attr_name: "fastcall" , |
| 10690 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) |
| 10691 | != NULL); |
| 10692 | is_thiscall = (lookup_attribute (attr_name: "thiscall" , |
| 10693 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) |
| 10694 | != NULL); |
| 10695 | regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl); |
| 10696 | |
| 10697 | if (is_fastcall) |
| 10698 | { |
| 10699 | if (DECL_STATIC_CHAIN (cfun->decl)) |
| 10700 | { |
| 10701 | sorry ("%<-fsplit-stack%> does not support fastcall with " |
| 10702 | "nested function" ); |
| 10703 | return INVALID_REGNUM; |
| 10704 | } |
| 10705 | return AX_REG; |
| 10706 | } |
| 10707 | else if (is_thiscall) |
| 10708 | { |
| 10709 | if (!DECL_STATIC_CHAIN (cfun->decl)) |
| 10710 | return DX_REG; |
| 10711 | return AX_REG; |
| 10712 | } |
| 10713 | else if (regparm < 3) |
| 10714 | { |
| 10715 | if (!DECL_STATIC_CHAIN (cfun->decl)) |
| 10716 | return CX_REG; |
| 10717 | else |
| 10718 | { |
| 10719 | if (regparm >= 2) |
| 10720 | { |
| 10721 | sorry ("%<-fsplit-stack%> does not support 2 register " |
| 10722 | "parameters for a nested function" ); |
| 10723 | return INVALID_REGNUM; |
| 10724 | } |
| 10725 | return DX_REG; |
| 10726 | } |
| 10727 | } |
| 10728 | else |
| 10729 | { |
| 10730 | /* FIXME: We could make this work by pushing a register |
| 10731 | around the addition and comparison. */ |
| 10732 | sorry ("%<-fsplit-stack%> does not support 3 register parameters" ); |
| 10733 | return INVALID_REGNUM; |
| 10734 | } |
| 10735 | } |
| 10736 | } |
| 10737 | |
| 10738 | /* A SYMBOL_REF for the function which allocates new stackspace for |
| 10739 | -fsplit-stack. */ |
| 10740 | |
| 10741 | static GTY(()) rtx split_stack_fn; |
| 10742 | |
| 10743 | /* A SYMBOL_REF for the more stack function when using the large model. */ |
| 10744 | |
| 10745 | static GTY(()) rtx split_stack_fn_large; |
| 10746 | |
| 10747 | /* Return location of the stack guard value in the TLS block. */ |
| 10748 | |
| 10749 | rtx |
| 10750 | ix86_split_stack_guard (void) |
| 10751 | { |
| 10752 | int offset; |
| 10753 | addr_space_t as = DEFAULT_TLS_SEG_REG; |
| 10754 | rtx r; |
| 10755 | |
| 10756 | gcc_assert (flag_split_stack); |
| 10757 | |
| 10758 | #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET |
| 10759 | offset = TARGET_THREAD_SPLIT_STACK_OFFSET; |
| 10760 | #else |
| 10761 | gcc_unreachable (); |
| 10762 | #endif |
| 10763 | |
| 10764 | r = GEN_INT (offset); |
| 10765 | r = gen_const_mem (Pmode, r); |
| 10766 | set_mem_addr_space (r, as); |
| 10767 | |
| 10768 | return r; |
| 10769 | } |
| 10770 | |
| 10771 | /* Handle -fsplit-stack. These are the first instructions in the |
| 10772 | function, even before the regular prologue. */ |
| 10773 | |
| 10774 | void |
| 10775 | ix86_expand_split_stack_prologue (void) |
| 10776 | { |
| 10777 | HOST_WIDE_INT allocate; |
| 10778 | unsigned HOST_WIDE_INT args_size; |
| 10779 | rtx_code_label *label; |
| 10780 | rtx limit, current, allocate_rtx, call_fusage; |
| 10781 | rtx_insn *call_insn; |
| 10782 | unsigned int scratch_regno = INVALID_REGNUM; |
| 10783 | rtx scratch_reg = NULL_RTX; |
| 10784 | rtx_code_label *varargs_label = NULL; |
| 10785 | rtx fn; |
| 10786 | |
| 10787 | gcc_assert (flag_split_stack && reload_completed); |
| 10788 | |
| 10789 | ix86_finalize_stack_frame_flags (); |
| 10790 | struct ix86_frame &frame = cfun->machine->frame; |
| 10791 | allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; |
| 10792 | |
| 10793 | /* This is the label we will branch to if we have enough stack |
| 10794 | space. We expect the basic block reordering pass to reverse this |
| 10795 | branch if optimizing, so that we branch in the unlikely case. */ |
| 10796 | label = gen_label_rtx (); |
| 10797 | |
| 10798 | /* We need to compare the stack pointer minus the frame size with |
| 10799 | the stack boundary in the TCB. The stack boundary always gives |
| 10800 | us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we |
| 10801 | can compare directly. Otherwise we need to do an addition. */ |
| 10802 | |
| 10803 | limit = ix86_split_stack_guard (); |
| 10804 | |
| 10805 | if (allocate >= SPLIT_STACK_AVAILABLE |
| 10806 | || flag_force_indirect_call) |
| 10807 | { |
| 10808 | scratch_regno = split_stack_prologue_scratch_regno (); |
| 10809 | if (scratch_regno == INVALID_REGNUM) |
| 10810 | return; |
| 10811 | } |
| 10812 | |
| 10813 | if (allocate >= SPLIT_STACK_AVAILABLE) |
| 10814 | { |
| 10815 | rtx offset; |
| 10816 | |
| 10817 | /* We need a scratch register to hold the stack pointer minus |
| 10818 | the required frame size. Since this is the very start of the |
| 10819 | function, the scratch register can be any caller-saved |
| 10820 | register which is not used for parameters. */ |
| 10821 | offset = GEN_INT (- allocate); |
| 10822 | |
| 10823 | scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
| 10824 | if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) |
| 10825 | { |
| 10826 | /* We don't use gen_add in this case because it will |
| 10827 | want to split to lea, but when not optimizing the insn |
| 10828 | will not be split after this point. */ |
| 10829 | emit_insn (gen_rtx_SET (scratch_reg, |
| 10830 | gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
| 10831 | offset))); |
| 10832 | } |
| 10833 | else |
| 10834 | { |
| 10835 | emit_move_insn (scratch_reg, offset); |
| 10836 | emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx)); |
| 10837 | } |
| 10838 | current = scratch_reg; |
| 10839 | } |
| 10840 | else |
| 10841 | current = stack_pointer_rtx; |
| 10842 | |
| 10843 | ix86_expand_branch (GEU, current, limit, label); |
| 10844 | rtx_insn *jump_insn = get_last_insn (); |
| 10845 | JUMP_LABEL (jump_insn) = label; |
| 10846 | |
| 10847 | /* Mark the jump as very likely to be taken. */ |
| 10848 | add_reg_br_prob_note (jump_insn, profile_probability::very_likely ()); |
| 10849 | |
| 10850 | if (split_stack_fn == NULL_RTX) |
| 10851 | { |
| 10852 | split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack" ); |
| 10853 | SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL; |
| 10854 | } |
| 10855 | fn = split_stack_fn; |
| 10856 | |
| 10857 | /* Get more stack space. We pass in the desired stack space and the |
| 10858 | size of the arguments to copy to the new stack. In 32-bit mode |
| 10859 | we push the parameters; __morestack will return on a new stack |
| 10860 | anyhow. In 64-bit mode we pass the parameters in r10 and |
| 10861 | r11. */ |
| 10862 | allocate_rtx = GEN_INT (allocate); |
| 10863 | args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0; |
| 10864 | call_fusage = NULL_RTX; |
| 10865 | rtx pop = NULL_RTX; |
| 10866 | if (TARGET_64BIT) |
| 10867 | { |
| 10868 | rtx reg10, reg11; |
| 10869 | |
| 10870 | reg10 = gen_rtx_REG (DImode, R10_REG); |
| 10871 | reg11 = gen_rtx_REG (DImode, R11_REG); |
| 10872 | |
| 10873 | /* If this function uses a static chain, it will be in %r10. |
| 10874 | Preserve it across the call to __morestack. */ |
| 10875 | if (DECL_STATIC_CHAIN (cfun->decl)) |
| 10876 | { |
| 10877 | rtx rax; |
| 10878 | |
| 10879 | rax = gen_rtx_REG (word_mode, AX_REG); |
| 10880 | emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG)); |
| 10881 | use_reg (fusage: &call_fusage, reg: rax); |
| 10882 | } |
| 10883 | |
| 10884 | if (flag_force_indirect_call |
| 10885 | || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
| 10886 | { |
| 10887 | HOST_WIDE_INT argval; |
| 10888 | |
| 10889 | if (split_stack_fn_large == NULL_RTX) |
| 10890 | { |
| 10891 | split_stack_fn_large |
| 10892 | = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model" ); |
| 10893 | SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; |
| 10894 | } |
| 10895 | |
| 10896 | fn = split_stack_fn_large; |
| 10897 | |
| 10898 | if (ix86_cmodel == CM_LARGE_PIC) |
| 10899 | { |
| 10900 | rtx_code_label *label; |
| 10901 | rtx x; |
| 10902 | |
| 10903 | gcc_assert (Pmode == DImode); |
| 10904 | |
| 10905 | label = gen_label_rtx (); |
| 10906 | emit_label (label); |
| 10907 | LABEL_PRESERVE_P (label) = 1; |
| 10908 | emit_insn (gen_set_rip_rex64 (reg10, label)); |
| 10909 | emit_insn (gen_set_got_offset_rex64 (reg11, label)); |
| 10910 | emit_insn (gen_add2_insn (reg10, reg11)); |
| 10911 | x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT); |
| 10912 | x = gen_rtx_CONST (Pmode, x); |
| 10913 | emit_move_insn (reg11, x); |
| 10914 | x = gen_rtx_PLUS (Pmode, reg10, reg11); |
| 10915 | x = gen_const_mem (Pmode, x); |
| 10916 | fn = copy_to_suggested_reg (x, reg11, Pmode); |
| 10917 | } |
| 10918 | else if (ix86_cmodel == CM_LARGE) |
| 10919 | fn = copy_to_suggested_reg (fn, reg11, Pmode); |
| 10920 | |
| 10921 | /* When using the large model we need to load the address |
| 10922 | into a register, and we've run out of registers. So we |
| 10923 | switch to a different calling convention, and we call a |
| 10924 | different function: __morestack_large. We pass the |
| 10925 | argument size in the upper 32 bits of r10 and pass the |
| 10926 | frame size in the lower 32 bits. */ |
| 10927 | gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate); |
| 10928 | gcc_assert ((args_size & 0xffffffff) == args_size); |
| 10929 | |
| 10930 | argval = ((args_size << 16) << 16) + allocate; |
| 10931 | emit_move_insn (reg10, GEN_INT (argval)); |
| 10932 | } |
| 10933 | else |
| 10934 | { |
| 10935 | emit_move_insn (reg10, allocate_rtx); |
| 10936 | emit_move_insn (reg11, GEN_INT (args_size)); |
| 10937 | use_reg (fusage: &call_fusage, reg: reg11); |
| 10938 | } |
| 10939 | |
| 10940 | use_reg (fusage: &call_fusage, reg: reg10); |
| 10941 | } |
| 10942 | else |
| 10943 | { |
| 10944 | if (flag_force_indirect_call && flag_pic) |
| 10945 | { |
| 10946 | rtx x; |
| 10947 | |
| 10948 | gcc_assert (Pmode == SImode); |
| 10949 | |
| 10950 | scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
| 10951 | |
| 10952 | emit_insn (gen_set_got (scratch_reg)); |
| 10953 | x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn), |
| 10954 | UNSPEC_GOT); |
| 10955 | x = gen_rtx_CONST (Pmode, x); |
| 10956 | x = gen_rtx_PLUS (Pmode, scratch_reg, x); |
| 10957 | x = gen_const_mem (Pmode, x); |
| 10958 | fn = copy_to_suggested_reg (x, scratch_reg, Pmode); |
| 10959 | } |
| 10960 | |
| 10961 | rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size))); |
| 10962 | add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD)); |
| 10963 | insn = emit_insn (gen_push (arg: allocate_rtx)); |
| 10964 | add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD)); |
| 10965 | pop = GEN_INT (2 * UNITS_PER_WORD); |
| 10966 | } |
| 10967 | |
| 10968 | if (flag_force_indirect_call && !register_operand (fn, VOIDmode)) |
| 10969 | { |
| 10970 | scratch_reg = gen_rtx_REG (word_mode, scratch_regno); |
| 10971 | |
| 10972 | if (GET_MODE (fn) != word_mode) |
| 10973 | fn = gen_rtx_ZERO_EXTEND (word_mode, fn); |
| 10974 | |
| 10975 | fn = copy_to_suggested_reg (fn, scratch_reg, word_mode); |
| 10976 | } |
| 10977 | |
| 10978 | call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn), |
| 10979 | GEN_INT (UNITS_PER_WORD), constm1_rtx, |
| 10980 | pop, false); |
| 10981 | add_function_usage_to (call_insn, call_fusage); |
| 10982 | if (!TARGET_64BIT) |
| 10983 | add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0)); |
| 10984 | /* Indicate that this function can't jump to non-local gotos. */ |
| 10985 | make_reg_eh_region_note_nothrow_nononlocal (call_insn); |
| 10986 | |
| 10987 | /* In order to make call/return prediction work right, we now need |
| 10988 | to execute a return instruction. See |
| 10989 | libgcc/config/i386/morestack.S for the details on how this works. |
| 10990 | |
| 10991 | For flow purposes gcc must not see this as a return |
| 10992 | instruction--we need control flow to continue at the subsequent |
| 10993 | label. Therefore, we use an unspec. */ |
| 10994 | gcc_assert (crtl->args.pops_args < 65536); |
| 10995 | rtx_insn *ret_insn |
| 10996 | = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); |
| 10997 | |
| 10998 | if ((flag_cf_protection & CF_BRANCH)) |
| 10999 | { |
| 11000 | /* Insert ENDBR since __morestack will jump back here via indirect |
| 11001 | call. */ |
| 11002 | rtx cet_eb = gen_nop_endbr (); |
| 11003 | emit_insn_after (cet_eb, ret_insn); |
| 11004 | } |
| 11005 | |
| 11006 | /* If we are in 64-bit mode and this function uses a static chain, |
| 11007 | we saved %r10 in %rax before calling _morestack. */ |
| 11008 | if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) |
| 11009 | emit_move_insn (gen_rtx_REG (word_mode, R10_REG), |
| 11010 | gen_rtx_REG (word_mode, AX_REG)); |
| 11011 | |
| 11012 | /* If this function calls va_start, we need to store a pointer to |
| 11013 | the arguments on the old stack, because they may not have been |
| 11014 | all copied to the new stack. At this point the old stack can be |
| 11015 | found at the frame pointer value used by __morestack, because |
| 11016 | __morestack has set that up before calling back to us. Here we |
| 11017 | store that pointer in a scratch register, and in |
| 11018 | ix86_expand_prologue we store the scratch register in a stack |
| 11019 | slot. */ |
| 11020 | if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
| 11021 | { |
| 11022 | rtx frame_reg; |
| 11023 | int words; |
| 11024 | |
| 11025 | scratch_regno = split_stack_prologue_scratch_regno (); |
| 11026 | scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
| 11027 | frame_reg = gen_rtx_REG (Pmode, BP_REG); |
| 11028 | |
| 11029 | /* 64-bit: |
| 11030 | fp -> old fp value |
| 11031 | return address within this function |
| 11032 | return address of caller of this function |
| 11033 | stack arguments |
| 11034 | So we add three words to get to the stack arguments. |
| 11035 | |
| 11036 | 32-bit: |
| 11037 | fp -> old fp value |
| 11038 | return address within this function |
| 11039 | first argument to __morestack |
| 11040 | second argument to __morestack |
| 11041 | return address of caller of this function |
| 11042 | stack arguments |
| 11043 | So we add five words to get to the stack arguments. |
| 11044 | */ |
| 11045 | words = TARGET_64BIT ? 3 : 5; |
| 11046 | emit_insn (gen_rtx_SET (scratch_reg, |
| 11047 | plus_constant (Pmode, frame_reg, |
| 11048 | words * UNITS_PER_WORD))); |
| 11049 | |
| 11050 | varargs_label = gen_label_rtx (); |
| 11051 | emit_jump_insn (gen_jump (varargs_label)); |
| 11052 | JUMP_LABEL (get_last_insn ()) = varargs_label; |
| 11053 | |
| 11054 | emit_barrier (); |
| 11055 | } |
| 11056 | |
| 11057 | emit_label (label); |
| 11058 | LABEL_NUSES (label) = 1; |
| 11059 | |
| 11060 | /* If this function calls va_start, we now have to set the scratch |
| 11061 | register for the case where we do not call __morestack. In this |
| 11062 | case we need to set it based on the stack pointer. */ |
| 11063 | if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
| 11064 | { |
| 11065 | emit_insn (gen_rtx_SET (scratch_reg, |
| 11066 | plus_constant (Pmode, stack_pointer_rtx, |
| 11067 | UNITS_PER_WORD))); |
| 11068 | |
| 11069 | emit_label (varargs_label); |
| 11070 | LABEL_NUSES (varargs_label) = 1; |
| 11071 | } |
| 11072 | } |
| 11073 | |
| 11074 | /* We may have to tell the dataflow pass that the split stack prologue |
| 11075 | is initializing a scratch register. */ |
| 11076 | |
| 11077 | static void |
| 11078 | ix86_live_on_entry (bitmap regs) |
| 11079 | { |
| 11080 | if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
| 11081 | { |
| 11082 | gcc_assert (flag_split_stack); |
| 11083 | bitmap_set_bit (regs, split_stack_prologue_scratch_regno ()); |
| 11084 | } |
| 11085 | } |
| 11086 | |
| 11087 | /* Extract the parts of an RTL expression that is a valid memory address |
| 11088 | for an instruction. Return false if the structure of the address is |
| 11089 | grossly off. */ |
| 11090 | |
| 11091 | bool |
| 11092 | ix86_decompose_address (rtx addr, struct ix86_address *out) |
| 11093 | { |
| 11094 | rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; |
| 11095 | rtx base_reg, index_reg; |
| 11096 | HOST_WIDE_INT scale = 1; |
| 11097 | rtx scale_rtx = NULL_RTX; |
| 11098 | rtx tmp; |
| 11099 | addr_space_t seg = ADDR_SPACE_GENERIC; |
| 11100 | |
| 11101 | /* Allow zero-extended SImode addresses, |
| 11102 | they will be emitted with addr32 prefix. */ |
| 11103 | if (TARGET_64BIT && GET_MODE (addr) == DImode) |
| 11104 | { |
| 11105 | if (GET_CODE (addr) == ZERO_EXTEND |
| 11106 | && GET_MODE (XEXP (addr, 0)) == SImode) |
| 11107 | { |
| 11108 | addr = XEXP (addr, 0); |
| 11109 | if (CONST_INT_P (addr)) |
| 11110 | return false; |
| 11111 | } |
| 11112 | else if (GET_CODE (addr) == AND) |
| 11113 | { |
| 11114 | rtx mask = XEXP (addr, 1); |
| 11115 | rtx shift_val; |
| 11116 | |
| 11117 | if (const_32bit_mask (mask, DImode) |
| 11118 | /* For ASHIFT inside AND, combine will not generate |
| 11119 | canonical zero-extend. Merge mask for AND and shift_count |
| 11120 | to check if it is canonical zero-extend. */ |
| 11121 | || (CONST_INT_P (mask) |
| 11122 | && GET_CODE (XEXP (addr, 0)) == ASHIFT |
| 11123 | && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1)) |
| 11124 | && ((UINTVAL (mask) |
| 11125 | | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1)) |
| 11126 | == HOST_WIDE_INT_UC (0xffffffff)))) |
| 11127 | { |
| 11128 | addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode); |
| 11129 | if (addr == NULL_RTX) |
| 11130 | return false; |
| 11131 | |
| 11132 | if (CONST_INT_P (addr)) |
| 11133 | return false; |
| 11134 | } |
| 11135 | } |
| 11136 | } |
| 11137 | |
| 11138 | /* Allow SImode subregs of DImode addresses, |
| 11139 | they will be emitted with addr32 prefix. */ |
| 11140 | if (TARGET_64BIT && GET_MODE (addr) == SImode) |
| 11141 | { |
| 11142 | if (SUBREG_P (addr) |
| 11143 | && GET_MODE (SUBREG_REG (addr)) == DImode) |
| 11144 | { |
| 11145 | addr = SUBREG_REG (addr); |
| 11146 | if (CONST_INT_P (addr)) |
| 11147 | return false; |
| 11148 | } |
| 11149 | } |
| 11150 | |
| 11151 | if (REG_P (addr)) |
| 11152 | base = addr; |
| 11153 | else if (SUBREG_P (addr)) |
| 11154 | { |
| 11155 | if (REG_P (SUBREG_REG (addr))) |
| 11156 | base = addr; |
| 11157 | else |
| 11158 | return false; |
| 11159 | } |
| 11160 | else if (GET_CODE (addr) == PLUS) |
| 11161 | { |
| 11162 | rtx addends[4], op; |
| 11163 | int n = 0, i; |
| 11164 | |
| 11165 | op = addr; |
| 11166 | do |
| 11167 | { |
| 11168 | if (n >= 4) |
| 11169 | return false; |
| 11170 | addends[n++] = XEXP (op, 1); |
| 11171 | op = XEXP (op, 0); |
| 11172 | } |
| 11173 | while (GET_CODE (op) == PLUS); |
| 11174 | if (n >= 4) |
| 11175 | return false; |
| 11176 | addends[n] = op; |
| 11177 | |
| 11178 | for (i = n; i >= 0; --i) |
| 11179 | { |
| 11180 | op = addends[i]; |
| 11181 | switch (GET_CODE (op)) |
| 11182 | { |
| 11183 | case MULT: |
| 11184 | if (index) |
| 11185 | return false; |
| 11186 | index = XEXP (op, 0); |
| 11187 | scale_rtx = XEXP (op, 1); |
| 11188 | break; |
| 11189 | |
| 11190 | case ASHIFT: |
| 11191 | if (index) |
| 11192 | return false; |
| 11193 | index = XEXP (op, 0); |
| 11194 | tmp = XEXP (op, 1); |
| 11195 | if (!CONST_INT_P (tmp)) |
| 11196 | return false; |
| 11197 | scale = INTVAL (tmp); |
| 11198 | if ((unsigned HOST_WIDE_INT) scale > 3) |
| 11199 | return false; |
| 11200 | scale = 1 << scale; |
| 11201 | break; |
| 11202 | |
| 11203 | case ZERO_EXTEND: |
| 11204 | op = XEXP (op, 0); |
| 11205 | if (GET_CODE (op) != UNSPEC) |
| 11206 | return false; |
| 11207 | /* FALLTHRU */ |
| 11208 | |
| 11209 | case UNSPEC: |
| 11210 | if (XINT (op, 1) == UNSPEC_TP |
| 11211 | && TARGET_TLS_DIRECT_SEG_REFS |
| 11212 | && seg == ADDR_SPACE_GENERIC) |
| 11213 | seg = DEFAULT_TLS_SEG_REG; |
| 11214 | else |
| 11215 | return false; |
| 11216 | break; |
| 11217 | |
| 11218 | case SUBREG: |
| 11219 | if (!REG_P (SUBREG_REG (op))) |
| 11220 | return false; |
| 11221 | /* FALLTHRU */ |
| 11222 | |
| 11223 | case REG: |
| 11224 | if (!base) |
| 11225 | base = op; |
| 11226 | else if (!index) |
| 11227 | index = op; |
| 11228 | else |
| 11229 | return false; |
| 11230 | break; |
| 11231 | |
| 11232 | case CONST: |
| 11233 | case CONST_INT: |
| 11234 | case SYMBOL_REF: |
| 11235 | case LABEL_REF: |
| 11236 | if (disp) |
| 11237 | return false; |
| 11238 | disp = op; |
| 11239 | break; |
| 11240 | |
| 11241 | default: |
| 11242 | return false; |
| 11243 | } |
| 11244 | } |
| 11245 | } |
| 11246 | else if (GET_CODE (addr) == MULT) |
| 11247 | { |
| 11248 | index = XEXP (addr, 0); /* index*scale */ |
| 11249 | scale_rtx = XEXP (addr, 1); |
| 11250 | } |
| 11251 | else if (GET_CODE (addr) == ASHIFT) |
| 11252 | { |
| 11253 | /* We're called for lea too, which implements ashift on occasion. */ |
| 11254 | index = XEXP (addr, 0); |
| 11255 | tmp = XEXP (addr, 1); |
| 11256 | if (!CONST_INT_P (tmp)) |
| 11257 | return false; |
| 11258 | scale = INTVAL (tmp); |
| 11259 | if ((unsigned HOST_WIDE_INT) scale > 3) |
| 11260 | return false; |
| 11261 | scale = 1 << scale; |
| 11262 | } |
| 11263 | else |
| 11264 | disp = addr; /* displacement */ |
| 11265 | |
| 11266 | if (index) |
| 11267 | { |
| 11268 | if (REG_P (index)) |
| 11269 | ; |
| 11270 | else if (SUBREG_P (index) |
| 11271 | && REG_P (SUBREG_REG (index))) |
| 11272 | ; |
| 11273 | else |
| 11274 | return false; |
| 11275 | } |
| 11276 | |
| 11277 | /* Extract the integral value of scale. */ |
| 11278 | if (scale_rtx) |
| 11279 | { |
| 11280 | if (!CONST_INT_P (scale_rtx)) |
| 11281 | return false; |
| 11282 | scale = INTVAL (scale_rtx); |
| 11283 | } |
| 11284 | |
| 11285 | base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base; |
| 11286 | index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index; |
| 11287 | |
| 11288 | /* Avoid useless 0 displacement. */ |
| 11289 | if (disp == const0_rtx && (base || index)) |
| 11290 | disp = NULL_RTX; |
| 11291 | |
| 11292 | /* Allow arg pointer and stack pointer as index if there is not scaling. */ |
| 11293 | if (base_reg && index_reg && scale == 1 |
| 11294 | && (REGNO (index_reg) == ARG_POINTER_REGNUM |
| 11295 | || REGNO (index_reg) == FRAME_POINTER_REGNUM |
| 11296 | || REGNO (index_reg) == SP_REG)) |
| 11297 | { |
| 11298 | std::swap (a&: base, b&: index); |
| 11299 | std::swap (a&: base_reg, b&: index_reg); |
| 11300 | } |
| 11301 | |
| 11302 | /* Special case: %ebp cannot be encoded as a base without a displacement. |
| 11303 | Similarly %r13. */ |
| 11304 | if (!disp && base_reg |
| 11305 | && (REGNO (base_reg) == ARG_POINTER_REGNUM |
| 11306 | || REGNO (base_reg) == FRAME_POINTER_REGNUM |
| 11307 | || REGNO (base_reg) == BP_REG |
| 11308 | || REGNO (base_reg) == R13_REG)) |
| 11309 | disp = const0_rtx; |
| 11310 | |
| 11311 | /* Special case: on K6, [%esi] makes the instruction vector decoded. |
| 11312 | Avoid this by transforming to [%esi+0]. |
| 11313 | Reload calls address legitimization without cfun defined, so we need |
| 11314 | to test cfun for being non-NULL. */ |
| 11315 | if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun) |
| 11316 | && base_reg && !index_reg && !disp |
| 11317 | && REGNO (base_reg) == SI_REG) |
| 11318 | disp = const0_rtx; |
| 11319 | |
| 11320 | /* Special case: encode reg+reg instead of reg*2. */ |
| 11321 | if (!base && index && scale == 2) |
| 11322 | base = index, base_reg = index_reg, scale = 1; |
| 11323 | |
| 11324 | /* Special case: scaling cannot be encoded without base or displacement. */ |
| 11325 | if (!base && !disp && index && scale != 1) |
| 11326 | disp = const0_rtx; |
| 11327 | |
| 11328 | out->base = base; |
| 11329 | out->index = index; |
| 11330 | out->disp = disp; |
| 11331 | out->scale = scale; |
| 11332 | out->seg = seg; |
| 11333 | |
| 11334 | return true; |
| 11335 | } |
| 11336 | |
| 11337 | /* Return cost of the memory address x. |
| 11338 | For i386, it is better to use a complex address than let gcc copy |
| 11339 | the address into a reg and make a new pseudo. But not if the address |
| 11340 | requires to two regs - that would mean more pseudos with longer |
| 11341 | lifetimes. */ |
| 11342 | static int |
| 11343 | ix86_address_cost (rtx x, machine_mode, addr_space_t, bool) |
| 11344 | { |
| 11345 | struct ix86_address parts; |
| 11346 | int cost = 1; |
| 11347 | int ok = ix86_decompose_address (addr: x, out: &parts); |
| 11348 | |
| 11349 | gcc_assert (ok); |
| 11350 | |
| 11351 | if (parts.base && SUBREG_P (parts.base)) |
| 11352 | parts.base = SUBREG_REG (parts.base); |
| 11353 | if (parts.index && SUBREG_P (parts.index)) |
| 11354 | parts.index = SUBREG_REG (parts.index); |
| 11355 | |
| 11356 | /* Attempt to minimize number of registers in the address by increasing |
| 11357 | address cost for each used register. We don't increase address cost |
| 11358 | for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx" |
| 11359 | is not invariant itself it most likely means that base or index is not |
| 11360 | invariant. Therefore only "pic_offset_table_rtx" could be hoisted out, |
| 11361 | which is not profitable for x86. */ |
| 11362 | if (parts.base |
| 11363 | && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) |
| 11364 | && (current_pass->type == GIMPLE_PASS |
| 11365 | || !pic_offset_table_rtx |
| 11366 | || !REG_P (parts.base) |
| 11367 | || REGNO (pic_offset_table_rtx) != REGNO (parts.base))) |
| 11368 | cost++; |
| 11369 | |
| 11370 | if (parts.index |
| 11371 | && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) |
| 11372 | && (current_pass->type == GIMPLE_PASS |
| 11373 | || !pic_offset_table_rtx |
| 11374 | || !REG_P (parts.index) |
| 11375 | || REGNO (pic_offset_table_rtx) != REGNO (parts.index))) |
| 11376 | cost++; |
| 11377 | |
| 11378 | /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, |
| 11379 | since it's predecode logic can't detect the length of instructions |
| 11380 | and it degenerates to vector decoded. Increase cost of such |
| 11381 | addresses here. The penalty is minimally 2 cycles. It may be worthwhile |
| 11382 | to split such addresses or even refuse such addresses at all. |
| 11383 | |
| 11384 | Following addressing modes are affected: |
| 11385 | [base+scale*index] |
| 11386 | [scale*index+disp] |
| 11387 | [base+index] |
| 11388 | |
| 11389 | The first and last case may be avoidable by explicitly coding the zero in |
| 11390 | memory address, but I don't have AMD-K6 machine handy to check this |
| 11391 | theory. */ |
| 11392 | |
| 11393 | if (TARGET_CPU_P (K6) |
| 11394 | && ((!parts.disp && parts.base && parts.index && parts.scale != 1) |
| 11395 | || (parts.disp && !parts.base && parts.index && parts.scale != 1) |
| 11396 | || (!parts.disp && parts.base && parts.index && parts.scale == 1))) |
| 11397 | cost += 10; |
| 11398 | |
| 11399 | return cost; |
| 11400 | } |
| 11401 | |
| 11402 | /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */ |
| 11403 | |
| 11404 | bool |
| 11405 | ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, |
| 11406 | unsigned int align, |
| 11407 | enum by_pieces_operation op, |
| 11408 | bool speed_p) |
| 11409 | { |
| 11410 | /* Return true when we are currently expanding memcpy/memset epilogue |
| 11411 | with move_by_pieces or store_by_pieces. */ |
| 11412 | if (cfun->machine->by_pieces_in_use) |
| 11413 | return true; |
| 11414 | |
| 11415 | return default_use_by_pieces_infrastructure_p (size, align, op, |
| 11416 | speed_p); |
| 11417 | } |
| 11418 | |
| 11419 | /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as |
| 11420 | this is used for to form addresses to local data when -fPIC is in |
| 11421 | use. */ |
| 11422 | |
| 11423 | static bool |
| 11424 | darwin_local_data_pic (rtx disp) |
| 11425 | { |
| 11426 | return (GET_CODE (disp) == UNSPEC |
| 11427 | && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); |
| 11428 | } |
| 11429 | |
| 11430 | /* True if the function symbol operand X should be loaded from GOT. |
| 11431 | If CALL_P is true, X is a call operand. |
| 11432 | |
| 11433 | NB: -mno-direct-extern-access doesn't force load from GOT for |
| 11434 | call. |
| 11435 | |
| 11436 | NB: In 32-bit mode, only non-PIC is allowed in inline assembly |
| 11437 | statements, since a PIC register could not be available at the |
| 11438 | call site. */ |
| 11439 | |
| 11440 | bool |
| 11441 | ix86_force_load_from_GOT_p (rtx x, bool call_p) |
| 11442 | { |
| 11443 | return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X)) |
| 11444 | && !TARGET_PECOFF && !TARGET_MACHO |
| 11445 | && (!flag_pic || this_is_asm_operands) |
| 11446 | && ix86_cmodel != CM_LARGE |
| 11447 | && ix86_cmodel != CM_LARGE_PIC |
| 11448 | && SYMBOL_REF_P (x) |
| 11449 | && ((!call_p |
| 11450 | && (!ix86_direct_extern_access |
| 11451 | || (SYMBOL_REF_DECL (x) |
| 11452 | && lookup_attribute (attr_name: "nodirect_extern_access" , |
| 11453 | DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))) |
| 11454 | || (SYMBOL_REF_FUNCTION_P (x) |
| 11455 | && (!flag_plt |
| 11456 | || (SYMBOL_REF_DECL (x) |
| 11457 | && lookup_attribute (attr_name: "noplt" , |
| 11458 | DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))) |
| 11459 | && !SYMBOL_REF_LOCAL_P (x)); |
| 11460 | } |
| 11461 | |
| 11462 | /* Determine if a given RTX is a valid constant. We already know this |
| 11463 | satisfies CONSTANT_P. */ |
| 11464 | |
| 11465 | static bool |
| 11466 | ix86_legitimate_constant_p (machine_mode mode, rtx x) |
| 11467 | { |
| 11468 | switch (GET_CODE (x)) |
| 11469 | { |
| 11470 | case CONST: |
| 11471 | x = XEXP (x, 0); |
| 11472 | |
| 11473 | if (GET_CODE (x) == PLUS) |
| 11474 | { |
| 11475 | if (!CONST_INT_P (XEXP (x, 1))) |
| 11476 | return false; |
| 11477 | x = XEXP (x, 0); |
| 11478 | } |
| 11479 | |
| 11480 | if (TARGET_MACHO && darwin_local_data_pic (disp: x)) |
| 11481 | return true; |
| 11482 | |
| 11483 | /* Only some unspecs are valid as "constants". */ |
| 11484 | if (GET_CODE (x) == UNSPEC) |
| 11485 | switch (XINT (x, 1)) |
| 11486 | { |
| 11487 | case UNSPEC_GOT: |
| 11488 | case UNSPEC_GOTOFF: |
| 11489 | case UNSPEC_PLTOFF: |
| 11490 | return TARGET_64BIT; |
| 11491 | case UNSPEC_TPOFF: |
| 11492 | case UNSPEC_NTPOFF: |
| 11493 | x = XVECEXP (x, 0, 0); |
| 11494 | return (SYMBOL_REF_P (x) |
| 11495 | && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); |
| 11496 | case UNSPEC_DTPOFF: |
| 11497 | x = XVECEXP (x, 0, 0); |
| 11498 | return (SYMBOL_REF_P (x) |
| 11499 | && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); |
| 11500 | case UNSPEC_SECREL32: |
| 11501 | x = XVECEXP (x, 0, 0); |
| 11502 | return SYMBOL_REF_P (x); |
| 11503 | default: |
| 11504 | return false; |
| 11505 | } |
| 11506 | |
| 11507 | /* We must have drilled down to a symbol. */ |
| 11508 | if (LABEL_REF_P (x)) |
| 11509 | return true; |
| 11510 | if (!SYMBOL_REF_P (x)) |
| 11511 | return false; |
| 11512 | /* FALLTHRU */ |
| 11513 | |
| 11514 | case SYMBOL_REF: |
| 11515 | /* TLS symbols are never valid. */ |
| 11516 | if (SYMBOL_REF_TLS_MODEL (x)) |
| 11517 | return false; |
| 11518 | |
| 11519 | /* DLLIMPORT symbols are never valid. */ |
| 11520 | if (TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| 11521 | && SYMBOL_REF_DLLIMPORT_P (x)) |
| 11522 | return false; |
| 11523 | |
| 11524 | #if TARGET_MACHO |
| 11525 | /* mdynamic-no-pic */ |
| 11526 | if (MACHO_DYNAMIC_NO_PIC_P) |
| 11527 | return machopic_symbol_defined_p (x); |
| 11528 | #endif |
| 11529 | |
| 11530 | /* External function address should be loaded |
| 11531 | via the GOT slot to avoid PLT. */ |
| 11532 | if (ix86_force_load_from_GOT_p (x)) |
| 11533 | return false; |
| 11534 | |
| 11535 | break; |
| 11536 | |
| 11537 | CASE_CONST_SCALAR_INT: |
| 11538 | if (ix86_endbr_immediate_operand (x, VOIDmode)) |
| 11539 | return false; |
| 11540 | |
| 11541 | switch (mode) |
| 11542 | { |
| 11543 | case E_TImode: |
| 11544 | if (TARGET_64BIT) |
| 11545 | return true; |
| 11546 | /* FALLTHRU */ |
| 11547 | case E_OImode: |
| 11548 | case E_XImode: |
| 11549 | if (!standard_sse_constant_p (x, pred_mode: mode) |
| 11550 | && GET_MODE_SIZE (TARGET_AVX512F |
| 11551 | ? XImode |
| 11552 | : (TARGET_AVX |
| 11553 | ? OImode |
| 11554 | : (TARGET_SSE2 |
| 11555 | ? TImode : DImode))) < GET_MODE_SIZE (mode)) |
| 11556 | return false; |
| 11557 | default: |
| 11558 | break; |
| 11559 | } |
| 11560 | break; |
| 11561 | |
| 11562 | case CONST_VECTOR: |
| 11563 | if (!standard_sse_constant_p (x, pred_mode: mode)) |
| 11564 | return false; |
| 11565 | break; |
| 11566 | |
| 11567 | case CONST_DOUBLE: |
| 11568 | if (mode == E_BFmode) |
| 11569 | return false; |
| 11570 | |
| 11571 | default: |
| 11572 | break; |
| 11573 | } |
| 11574 | |
| 11575 | /* Otherwise we handle everything else in the move patterns. */ |
| 11576 | return true; |
| 11577 | } |
| 11578 | |
| 11579 | /* Determine if it's legal to put X into the constant pool. This |
| 11580 | is not possible for the address of thread-local symbols, which |
| 11581 | is checked above. */ |
| 11582 | |
| 11583 | static bool |
| 11584 | ix86_cannot_force_const_mem (machine_mode mode, rtx x) |
| 11585 | { |
| 11586 | /* We can put any immediate constant in memory. */ |
| 11587 | switch (GET_CODE (x)) |
| 11588 | { |
| 11589 | CASE_CONST_ANY: |
| 11590 | return false; |
| 11591 | |
| 11592 | default: |
| 11593 | break; |
| 11594 | } |
| 11595 | |
| 11596 | return !ix86_legitimate_constant_p (mode, x); |
| 11597 | } |
| 11598 | |
| 11599 | /* Return a unique alias set for the GOT. */ |
| 11600 | |
| 11601 | alias_set_type |
| 11602 | ix86_GOT_alias_set (void) |
| 11603 | { |
| 11604 | static alias_set_type set = -1; |
| 11605 | if (set == -1) |
| 11606 | set = new_alias_set (); |
| 11607 | return set; |
| 11608 | } |
| 11609 | |
| 11610 | /* Nonzero if the constant value X is a legitimate general operand |
| 11611 | when generating PIC code. It is given that flag_pic is on and |
| 11612 | that X satisfies CONSTANT_P. */ |
| 11613 | |
| 11614 | bool |
| 11615 | legitimate_pic_operand_p (rtx x) |
| 11616 | { |
| 11617 | rtx inner; |
| 11618 | |
| 11619 | switch (GET_CODE (x)) |
| 11620 | { |
| 11621 | case CONST: |
| 11622 | inner = XEXP (x, 0); |
| 11623 | if (GET_CODE (inner) == PLUS |
| 11624 | && CONST_INT_P (XEXP (inner, 1))) |
| 11625 | inner = XEXP (inner, 0); |
| 11626 | |
| 11627 | /* Only some unspecs are valid as "constants". */ |
| 11628 | if (GET_CODE (inner) == UNSPEC) |
| 11629 | switch (XINT (inner, 1)) |
| 11630 | { |
| 11631 | case UNSPEC_GOT: |
| 11632 | case UNSPEC_GOTOFF: |
| 11633 | case UNSPEC_PLTOFF: |
| 11634 | return TARGET_64BIT; |
| 11635 | case UNSPEC_TPOFF: |
| 11636 | x = XVECEXP (inner, 0, 0); |
| 11637 | return (SYMBOL_REF_P (x) |
| 11638 | && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); |
| 11639 | case UNSPEC_SECREL32: |
| 11640 | x = XVECEXP (inner, 0, 0); |
| 11641 | return SYMBOL_REF_P (x); |
| 11642 | case UNSPEC_MACHOPIC_OFFSET: |
| 11643 | return legitimate_pic_address_disp_p (x); |
| 11644 | default: |
| 11645 | return false; |
| 11646 | } |
| 11647 | /* FALLTHRU */ |
| 11648 | |
| 11649 | case SYMBOL_REF: |
| 11650 | case LABEL_REF: |
| 11651 | return legitimate_pic_address_disp_p (x); |
| 11652 | |
| 11653 | default: |
| 11654 | return true; |
| 11655 | } |
| 11656 | } |
| 11657 | |
| 11658 | /* Determine if a given CONST RTX is a valid memory displacement |
| 11659 | in PIC mode. */ |
| 11660 | |
| 11661 | bool |
| 11662 | legitimate_pic_address_disp_p (rtx disp) |
| 11663 | { |
| 11664 | bool saw_plus; |
| 11665 | |
| 11666 | /* In 64bit mode we can allow direct addresses of symbols and labels |
| 11667 | when they are not dynamic symbols. */ |
| 11668 | if (TARGET_64BIT) |
| 11669 | { |
| 11670 | rtx op0 = disp, op1; |
| 11671 | |
| 11672 | switch (GET_CODE (disp)) |
| 11673 | { |
| 11674 | case LABEL_REF: |
| 11675 | return true; |
| 11676 | |
| 11677 | case CONST: |
| 11678 | if (GET_CODE (XEXP (disp, 0)) != PLUS) |
| 11679 | break; |
| 11680 | op0 = XEXP (XEXP (disp, 0), 0); |
| 11681 | op1 = XEXP (XEXP (disp, 0), 1); |
| 11682 | if (!CONST_INT_P (op1)) |
| 11683 | break; |
| 11684 | if (GET_CODE (op0) == UNSPEC |
| 11685 | && (XINT (op0, 1) == UNSPEC_DTPOFF |
| 11686 | || XINT (op0, 1) == UNSPEC_NTPOFF) |
| 11687 | && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1)) |
| 11688 | return true; |
| 11689 | if (INTVAL (op1) >= 16*1024*1024 |
| 11690 | || INTVAL (op1) < -16*1024*1024) |
| 11691 | break; |
| 11692 | if (LABEL_REF_P (op0)) |
| 11693 | return true; |
| 11694 | if (GET_CODE (op0) == CONST |
| 11695 | && GET_CODE (XEXP (op0, 0)) == UNSPEC |
| 11696 | && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL) |
| 11697 | return true; |
| 11698 | if (GET_CODE (op0) == UNSPEC |
| 11699 | && XINT (op0, 1) == UNSPEC_PCREL) |
| 11700 | return true; |
| 11701 | if (!SYMBOL_REF_P (op0)) |
| 11702 | break; |
| 11703 | /* FALLTHRU */ |
| 11704 | |
| 11705 | case SYMBOL_REF: |
| 11706 | /* TLS references should always be enclosed in UNSPEC. |
| 11707 | The dllimported symbol needs always to be resolved. */ |
| 11708 | if (SYMBOL_REF_TLS_MODEL (op0) |
| 11709 | || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0))) |
| 11710 | return false; |
| 11711 | |
| 11712 | if (TARGET_PECOFF) |
| 11713 | { |
| 11714 | #if TARGET_PECOFF |
| 11715 | if (is_imported_p (op0)) |
| 11716 | return true; |
| 11717 | #endif |
| 11718 | |
| 11719 | if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0)) |
| 11720 | break; |
| 11721 | |
| 11722 | /* Non-external-weak function symbols need to be resolved only |
| 11723 | for the large model. Non-external symbols don't need to be |
| 11724 | resolved for large and medium models. For the small model, |
| 11725 | we don't need to resolve anything here. */ |
| 11726 | if ((ix86_cmodel != CM_LARGE_PIC |
| 11727 | && SYMBOL_REF_FUNCTION_P (op0) |
| 11728 | && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0))) |
| 11729 | || !SYMBOL_REF_EXTERNAL_P (op0) |
| 11730 | || ix86_cmodel == CM_SMALL_PIC) |
| 11731 | return true; |
| 11732 | } |
| 11733 | else if (!SYMBOL_REF_FAR_ADDR_P (op0) |
| 11734 | && (SYMBOL_REF_LOCAL_P (op0) |
| 11735 | || ((ix86_direct_extern_access |
| 11736 | && !(SYMBOL_REF_DECL (op0) |
| 11737 | && lookup_attribute (attr_name: "nodirect_extern_access" , |
| 11738 | DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0))))) |
| 11739 | && HAVE_LD_PIE_COPYRELOC |
| 11740 | && flag_pie |
| 11741 | && !SYMBOL_REF_WEAK (op0) |
| 11742 | && !SYMBOL_REF_FUNCTION_P (op0))) |
| 11743 | && ix86_cmodel != CM_LARGE_PIC) |
| 11744 | return true; |
| 11745 | break; |
| 11746 | |
| 11747 | default: |
| 11748 | break; |
| 11749 | } |
| 11750 | } |
| 11751 | if (GET_CODE (disp) != CONST) |
| 11752 | return false; |
| 11753 | disp = XEXP (disp, 0); |
| 11754 | |
| 11755 | if (TARGET_64BIT) |
| 11756 | { |
| 11757 | /* We are unsafe to allow PLUS expressions. This limit allowed distance |
| 11758 | of GOT tables. We should not need these anyway. */ |
| 11759 | if (GET_CODE (disp) != UNSPEC |
| 11760 | || (XINT (disp, 1) != UNSPEC_GOTPCREL |
| 11761 | && XINT (disp, 1) != UNSPEC_GOTOFF |
| 11762 | && XINT (disp, 1) != UNSPEC_PCREL |
| 11763 | && XINT (disp, 1) != UNSPEC_PLTOFF)) |
| 11764 | return false; |
| 11765 | |
| 11766 | if (!SYMBOL_REF_P (XVECEXP (disp, 0, 0)) |
| 11767 | && !LABEL_REF_P (XVECEXP (disp, 0, 0))) |
| 11768 | return false; |
| 11769 | return true; |
| 11770 | } |
| 11771 | |
| 11772 | saw_plus = false; |
| 11773 | if (GET_CODE (disp) == PLUS) |
| 11774 | { |
| 11775 | if (!CONST_INT_P (XEXP (disp, 1))) |
| 11776 | return false; |
| 11777 | disp = XEXP (disp, 0); |
| 11778 | saw_plus = true; |
| 11779 | } |
| 11780 | |
| 11781 | if (TARGET_MACHO && darwin_local_data_pic (disp)) |
| 11782 | return true; |
| 11783 | |
| 11784 | if (GET_CODE (disp) != UNSPEC) |
| 11785 | return false; |
| 11786 | |
| 11787 | switch (XINT (disp, 1)) |
| 11788 | { |
| 11789 | case UNSPEC_GOT: |
| 11790 | if (saw_plus) |
| 11791 | return false; |
| 11792 | /* We need to check for both symbols and labels because VxWorks loads |
| 11793 | text labels with @GOT rather than @GOTOFF. See gotoff_operand for |
| 11794 | details. */ |
| 11795 | return (SYMBOL_REF_P (XVECEXP (disp, 0, 0)) |
| 11796 | || LABEL_REF_P (XVECEXP (disp, 0, 0))); |
| 11797 | case UNSPEC_GOTOFF: |
| 11798 | /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. |
| 11799 | While ABI specify also 32bit relocation but we don't produce it in |
| 11800 | small PIC model at all. */ |
| 11801 | if ((SYMBOL_REF_P (XVECEXP (disp, 0, 0)) |
| 11802 | || LABEL_REF_P (XVECEXP (disp, 0, 0))) |
| 11803 | && !TARGET_64BIT) |
| 11804 | return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode); |
| 11805 | return false; |
| 11806 | case UNSPEC_GOTTPOFF: |
| 11807 | case UNSPEC_GOTNTPOFF: |
| 11808 | case UNSPEC_INDNTPOFF: |
| 11809 | if (saw_plus) |
| 11810 | return false; |
| 11811 | disp = XVECEXP (disp, 0, 0); |
| 11812 | return (SYMBOL_REF_P (disp) |
| 11813 | && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); |
| 11814 | case UNSPEC_NTPOFF: |
| 11815 | disp = XVECEXP (disp, 0, 0); |
| 11816 | return (SYMBOL_REF_P (disp) |
| 11817 | && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); |
| 11818 | case UNSPEC_DTPOFF: |
| 11819 | disp = XVECEXP (disp, 0, 0); |
| 11820 | return (SYMBOL_REF_P (disp) |
| 11821 | && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); |
| 11822 | case UNSPEC_SECREL32: |
| 11823 | disp = XVECEXP (disp, 0, 0); |
| 11824 | return SYMBOL_REF_P (disp); |
| 11825 | } |
| 11826 | |
| 11827 | return false; |
| 11828 | } |
| 11829 | |
| 11830 | /* Determine if op is suitable RTX for an address register. |
| 11831 | Return naked register if a register or a register subreg is |
| 11832 | found, otherwise return NULL_RTX. */ |
| 11833 | |
| 11834 | static rtx |
| 11835 | ix86_validate_address_register (rtx op) |
| 11836 | { |
| 11837 | machine_mode mode = GET_MODE (op); |
| 11838 | |
| 11839 | /* Only SImode or DImode registers can form the address. */ |
| 11840 | if (mode != SImode && mode != DImode) |
| 11841 | return NULL_RTX; |
| 11842 | |
| 11843 | if (REG_P (op)) |
| 11844 | return op; |
| 11845 | else if (SUBREG_P (op)) |
| 11846 | { |
| 11847 | rtx reg = SUBREG_REG (op); |
| 11848 | |
| 11849 | if (!REG_P (reg)) |
| 11850 | return NULL_RTX; |
| 11851 | |
| 11852 | mode = GET_MODE (reg); |
| 11853 | |
| 11854 | /* Don't allow SUBREGs that span more than a word. It can |
| 11855 | lead to spill failures when the register is one word out |
| 11856 | of a two word structure. */ |
| 11857 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| 11858 | return NULL_RTX; |
| 11859 | |
| 11860 | /* Allow only SUBREGs of non-eliminable hard registers. */ |
| 11861 | if (register_no_elim_operand (reg, mode)) |
| 11862 | return reg; |
| 11863 | } |
| 11864 | |
| 11865 | /* Op is not a register. */ |
| 11866 | return NULL_RTX; |
| 11867 | } |
| 11868 | |
| 11869 | /* Determine which memory address register set insn can use. */ |
| 11870 | |
| 11871 | static enum attr_addr |
| 11872 | ix86_memory_address_reg_class (rtx_insn* insn) |
| 11873 | { |
| 11874 | /* LRA can do some initialization with NULL insn, |
| 11875 | return maximum register class in this case. */ |
| 11876 | enum attr_addr addr_rclass = ADDR_GPR32; |
| 11877 | |
| 11878 | if (!insn) |
| 11879 | return addr_rclass; |
| 11880 | |
| 11881 | if (asm_noperands (PATTERN (insn)) >= 0 |
| 11882 | || GET_CODE (PATTERN (insn)) == ASM_INPUT) |
| 11883 | return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16; |
| 11884 | |
| 11885 | /* Return maximum register class for unrecognized instructions. */ |
| 11886 | if (INSN_CODE (insn) < 0) |
| 11887 | return addr_rclass; |
| 11888 | |
| 11889 | /* Try to recognize the insn before calling get_attr_addr. |
| 11890 | Save current recog_data and current alternative. */ |
| 11891 | struct recog_data_d saved_recog_data = recog_data; |
| 11892 | int saved_alternative = which_alternative; |
| 11893 | |
| 11894 | /* Update recog_data for processing of alternatives. */ |
| 11895 | extract_insn_cached (insn); |
| 11896 | |
| 11897 | /* If current alternative is not set, loop throught enabled |
| 11898 | alternatives and get the most limited register class. */ |
| 11899 | if (saved_alternative == -1) |
| 11900 | { |
| 11901 | alternative_mask enabled = get_enabled_alternatives (insn); |
| 11902 | |
| 11903 | for (int i = 0; i < recog_data.n_alternatives; i++) |
| 11904 | { |
| 11905 | if (!TEST_BIT (enabled, i)) |
| 11906 | continue; |
| 11907 | |
| 11908 | which_alternative = i; |
| 11909 | addr_rclass = MIN (addr_rclass, get_attr_addr (insn)); |
| 11910 | } |
| 11911 | } |
| 11912 | else |
| 11913 | { |
| 11914 | which_alternative = saved_alternative; |
| 11915 | addr_rclass = get_attr_addr (insn); |
| 11916 | } |
| 11917 | |
| 11918 | recog_data = saved_recog_data; |
| 11919 | which_alternative = saved_alternative; |
| 11920 | |
| 11921 | return addr_rclass; |
| 11922 | } |
| 11923 | |
| 11924 | /* Return memory address register class insn can use. */ |
| 11925 | |
| 11926 | enum reg_class |
| 11927 | ix86_insn_base_reg_class (rtx_insn* insn) |
| 11928 | { |
| 11929 | switch (ix86_memory_address_reg_class (insn)) |
| 11930 | { |
| 11931 | case ADDR_GPR8: |
| 11932 | return LEGACY_GENERAL_REGS; |
| 11933 | case ADDR_GPR16: |
| 11934 | return GENERAL_GPR16; |
| 11935 | case ADDR_GPR32: |
| 11936 | break; |
| 11937 | default: |
| 11938 | gcc_unreachable (); |
| 11939 | } |
| 11940 | |
| 11941 | return BASE_REG_CLASS; |
| 11942 | } |
| 11943 | |
| 11944 | bool |
| 11945 | ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn) |
| 11946 | { |
| 11947 | switch (ix86_memory_address_reg_class (insn)) |
| 11948 | { |
| 11949 | case ADDR_GPR8: |
| 11950 | return LEGACY_INT_REGNO_P (regno); |
| 11951 | case ADDR_GPR16: |
| 11952 | return GENERAL_GPR16_REGNO_P (regno); |
| 11953 | case ADDR_GPR32: |
| 11954 | break; |
| 11955 | default: |
| 11956 | gcc_unreachable (); |
| 11957 | } |
| 11958 | |
| 11959 | return GENERAL_REGNO_P (regno); |
| 11960 | } |
| 11961 | |
| 11962 | enum reg_class |
| 11963 | ix86_insn_index_reg_class (rtx_insn* insn) |
| 11964 | { |
| 11965 | switch (ix86_memory_address_reg_class (insn)) |
| 11966 | { |
| 11967 | case ADDR_GPR8: |
| 11968 | return LEGACY_INDEX_REGS; |
| 11969 | case ADDR_GPR16: |
| 11970 | return INDEX_GPR16; |
| 11971 | case ADDR_GPR32: |
| 11972 | break; |
| 11973 | default: |
| 11974 | gcc_unreachable (); |
| 11975 | } |
| 11976 | |
| 11977 | return INDEX_REG_CLASS; |
| 11978 | } |
| 11979 | |
| 11980 | /* Recognizes RTL expressions that are valid memory addresses for an |
| 11981 | instruction. The MODE argument is the machine mode for the MEM |
| 11982 | expression that wants to use this address. |
| 11983 | |
| 11984 | It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should |
| 11985 | convert common non-canonical forms to canonical form so that they will |
| 11986 | be recognized. */ |
| 11987 | |
| 11988 | static bool |
| 11989 | ix86_legitimate_address_p (machine_mode, rtx addr, bool strict, |
| 11990 | code_helper = ERROR_MARK) |
| 11991 | { |
| 11992 | struct ix86_address parts; |
| 11993 | rtx base, index, disp; |
| 11994 | HOST_WIDE_INT scale; |
| 11995 | addr_space_t seg; |
| 11996 | |
| 11997 | if (ix86_decompose_address (addr, out: &parts) == 0) |
| 11998 | /* Decomposition failed. */ |
| 11999 | return false; |
| 12000 | |
| 12001 | base = parts.base; |
| 12002 | index = parts.index; |
| 12003 | disp = parts.disp; |
| 12004 | scale = parts.scale; |
| 12005 | seg = parts.seg; |
| 12006 | |
| 12007 | /* Validate base register. */ |
| 12008 | if (base) |
| 12009 | { |
| 12010 | rtx reg = ix86_validate_address_register (op: base); |
| 12011 | |
| 12012 | if (reg == NULL_RTX) |
| 12013 | return false; |
| 12014 | |
| 12015 | unsigned int regno = REGNO (reg); |
| 12016 | if ((strict && !REGNO_OK_FOR_BASE_P (regno)) |
| 12017 | || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno))) |
| 12018 | /* Base is not valid. */ |
| 12019 | return false; |
| 12020 | } |
| 12021 | |
| 12022 | /* Validate index register. */ |
| 12023 | if (index) |
| 12024 | { |
| 12025 | rtx reg = ix86_validate_address_register (op: index); |
| 12026 | |
| 12027 | if (reg == NULL_RTX) |
| 12028 | return false; |
| 12029 | |
| 12030 | unsigned int regno = REGNO (reg); |
| 12031 | if ((strict && !REGNO_OK_FOR_INDEX_P (regno)) |
| 12032 | || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno))) |
| 12033 | /* Index is not valid. */ |
| 12034 | return false; |
| 12035 | } |
| 12036 | |
| 12037 | /* Index and base should have the same mode. */ |
| 12038 | if (base && index |
| 12039 | && GET_MODE (base) != GET_MODE (index)) |
| 12040 | return false; |
| 12041 | |
| 12042 | /* Address override works only on the (%reg) part of %fs:(%reg). */ |
| 12043 | if (seg != ADDR_SPACE_GENERIC |
| 12044 | && ((base && GET_MODE (base) != word_mode) |
| 12045 | || (index && GET_MODE (index) != word_mode))) |
| 12046 | return false; |
| 12047 | |
| 12048 | /* Validate scale factor. */ |
| 12049 | if (scale != 1) |
| 12050 | { |
| 12051 | if (!index) |
| 12052 | /* Scale without index. */ |
| 12053 | return false; |
| 12054 | |
| 12055 | if (scale != 2 && scale != 4 && scale != 8) |
| 12056 | /* Scale is not a valid multiplier. */ |
| 12057 | return false; |
| 12058 | } |
| 12059 | |
| 12060 | /* Validate displacement. */ |
| 12061 | if (disp) |
| 12062 | { |
| 12063 | if (ix86_endbr_immediate_operand (disp, VOIDmode)) |
| 12064 | return false; |
| 12065 | |
| 12066 | if (GET_CODE (disp) == CONST |
| 12067 | && GET_CODE (XEXP (disp, 0)) == UNSPEC |
| 12068 | && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) |
| 12069 | switch (XINT (XEXP (disp, 0), 1)) |
| 12070 | { |
| 12071 | /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit |
| 12072 | when used. While ABI specify also 32bit relocations, we |
| 12073 | don't produce them at all and use IP relative instead. |
| 12074 | Allow GOT in 32bit mode for both PIC and non-PIC if symbol |
| 12075 | should be loaded via GOT. */ |
| 12076 | case UNSPEC_GOT: |
| 12077 | if (!TARGET_64BIT |
| 12078 | && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) |
| 12079 | goto is_legitimate_pic; |
| 12080 | /* FALLTHRU */ |
| 12081 | case UNSPEC_GOTOFF: |
| 12082 | gcc_assert (flag_pic); |
| 12083 | if (!TARGET_64BIT) |
| 12084 | goto is_legitimate_pic; |
| 12085 | |
| 12086 | /* 64bit address unspec. */ |
| 12087 | return false; |
| 12088 | |
| 12089 | case UNSPEC_GOTPCREL: |
| 12090 | if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) |
| 12091 | goto is_legitimate_pic; |
| 12092 | /* FALLTHRU */ |
| 12093 | case UNSPEC_PCREL: |
| 12094 | gcc_assert (flag_pic); |
| 12095 | goto is_legitimate_pic; |
| 12096 | |
| 12097 | case UNSPEC_GOTTPOFF: |
| 12098 | case UNSPEC_GOTNTPOFF: |
| 12099 | case UNSPEC_INDNTPOFF: |
| 12100 | case UNSPEC_NTPOFF: |
| 12101 | case UNSPEC_DTPOFF: |
| 12102 | case UNSPEC_SECREL32: |
| 12103 | break; |
| 12104 | |
| 12105 | default: |
| 12106 | /* Invalid address unspec. */ |
| 12107 | return false; |
| 12108 | } |
| 12109 | |
| 12110 | else if (SYMBOLIC_CONST (disp) |
| 12111 | && (flag_pic |
| 12112 | #if TARGET_MACHO |
| 12113 | || (MACHOPIC_INDIRECT |
| 12114 | && !machopic_operand_p (disp)) |
| 12115 | #endif |
| 12116 | )) |
| 12117 | { |
| 12118 | |
| 12119 | is_legitimate_pic: |
| 12120 | if (TARGET_64BIT && (index || base)) |
| 12121 | { |
| 12122 | /* foo@dtpoff(%rX) is ok. */ |
| 12123 | if (GET_CODE (disp) != CONST |
| 12124 | || GET_CODE (XEXP (disp, 0)) != PLUS |
| 12125 | || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC |
| 12126 | || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) |
| 12127 | || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF |
| 12128 | && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF |
| 12129 | && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32)) |
| 12130 | /* Non-constant pic memory reference. */ |
| 12131 | return false; |
| 12132 | } |
| 12133 | else if ((!TARGET_MACHO || flag_pic) |
| 12134 | && ! legitimate_pic_address_disp_p (disp)) |
| 12135 | /* Displacement is an invalid pic construct. */ |
| 12136 | return false; |
| 12137 | #if TARGET_MACHO |
| 12138 | else if (MACHO_DYNAMIC_NO_PIC_P |
| 12139 | && !ix86_legitimate_constant_p (Pmode, disp)) |
| 12140 | /* displacment must be referenced via non_lazy_pointer */ |
| 12141 | return false; |
| 12142 | #endif |
| 12143 | |
| 12144 | /* This code used to verify that a symbolic pic displacement |
| 12145 | includes the pic_offset_table_rtx register. |
| 12146 | |
| 12147 | While this is good idea, unfortunately these constructs may |
| 12148 | be created by "adds using lea" optimization for incorrect |
| 12149 | code like: |
| 12150 | |
| 12151 | int a; |
| 12152 | int foo(int i) |
| 12153 | { |
| 12154 | return *(&a+i); |
| 12155 | } |
| 12156 | |
| 12157 | This code is nonsensical, but results in addressing |
| 12158 | GOT table with pic_offset_table_rtx base. We can't |
| 12159 | just refuse it easily, since it gets matched by |
| 12160 | "addsi3" pattern, that later gets split to lea in the |
| 12161 | case output register differs from input. While this |
| 12162 | can be handled by separate addsi pattern for this case |
| 12163 | that never results in lea, this seems to be easier and |
| 12164 | correct fix for crash to disable this test. */ |
| 12165 | } |
| 12166 | else if (!LABEL_REF_P (disp) |
| 12167 | && !CONST_INT_P (disp) |
| 12168 | && (GET_CODE (disp) != CONST |
| 12169 | || !ix86_legitimate_constant_p (Pmode, x: disp)) |
| 12170 | && (!SYMBOL_REF_P (disp) |
| 12171 | || !ix86_legitimate_constant_p (Pmode, x: disp))) |
| 12172 | /* Displacement is not constant. */ |
| 12173 | return false; |
| 12174 | else if (TARGET_64BIT |
| 12175 | && !x86_64_immediate_operand (disp, VOIDmode)) |
| 12176 | /* Displacement is out of range. */ |
| 12177 | return false; |
| 12178 | /* In x32 mode, constant addresses are sign extended to 64bit, so |
| 12179 | we have to prevent addresses from 0x80000000 to 0xffffffff. */ |
| 12180 | else if (TARGET_X32 && !(index || base) |
| 12181 | && CONST_INT_P (disp) |
| 12182 | && val_signbit_known_set_p (SImode, INTVAL (disp))) |
| 12183 | return false; |
| 12184 | } |
| 12185 | |
| 12186 | /* Everything looks valid. */ |
| 12187 | return true; |
| 12188 | } |
| 12189 | |
| 12190 | /* Determine if a given RTX is a valid constant address. */ |
| 12191 | |
| 12192 | bool |
| 12193 | constant_address_p (rtx x) |
| 12194 | { |
| 12195 | return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, addr: x, strict: 1); |
| 12196 | } |
| 12197 | |
| 12198 | |
| 12199 | /* Return a legitimate reference for ORIG (an address) using the |
| 12200 | register REG. If REG is 0, a new pseudo is generated. |
| 12201 | |
| 12202 | There are two types of references that must be handled: |
| 12203 | |
| 12204 | 1. Global data references must load the address from the GOT, via |
| 12205 | the PIC reg. An insn is emitted to do this load, and the reg is |
| 12206 | returned. |
| 12207 | |
| 12208 | 2. Static data references, constant pool addresses, and code labels |
| 12209 | compute the address as an offset from the GOT, whose base is in |
| 12210 | the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to |
| 12211 | differentiate them from global data objects. The returned |
| 12212 | address is the PIC reg + an unspec constant. |
| 12213 | |
| 12214 | TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC |
| 12215 | reg also appears in the address. */ |
| 12216 | |
| 12217 | rtx |
| 12218 | legitimize_pic_address (rtx orig, rtx reg) |
| 12219 | { |
| 12220 | rtx addr = orig; |
| 12221 | rtx new_rtx = orig; |
| 12222 | |
| 12223 | #if TARGET_MACHO |
| 12224 | if (TARGET_MACHO && !TARGET_64BIT) |
| 12225 | { |
| 12226 | if (reg == 0) |
| 12227 | reg = gen_reg_rtx (Pmode); |
| 12228 | /* Use the generic Mach-O PIC machinery. */ |
| 12229 | return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); |
| 12230 | } |
| 12231 | #endif |
| 12232 | |
| 12233 | if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
| 12234 | { |
| 12235 | #if TARGET_PECOFF |
| 12236 | rtx tmp = legitimize_pe_coff_symbol (addr, true); |
| 12237 | if (tmp) |
| 12238 | return tmp; |
| 12239 | #endif |
| 12240 | } |
| 12241 | |
| 12242 | if (TARGET_64BIT && legitimate_pic_address_disp_p (disp: addr)) |
| 12243 | new_rtx = addr; |
| 12244 | else if ((!TARGET_64BIT |
| 12245 | || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC) |
| 12246 | && !TARGET_PECOFF |
| 12247 | && gotoff_operand (addr, Pmode)) |
| 12248 | { |
| 12249 | /* This symbol may be referenced via a displacement |
| 12250 | from the PIC base address (@GOTOFF). */ |
| 12251 | if (GET_CODE (addr) == CONST) |
| 12252 | addr = XEXP (addr, 0); |
| 12253 | |
| 12254 | if (GET_CODE (addr) == PLUS) |
| 12255 | { |
| 12256 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), |
| 12257 | UNSPEC_GOTOFF); |
| 12258 | new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); |
| 12259 | } |
| 12260 | else |
| 12261 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); |
| 12262 | |
| 12263 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
| 12264 | |
| 12265 | if (TARGET_64BIT) |
| 12266 | new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); |
| 12267 | |
| 12268 | if (reg != 0) |
| 12269 | { |
| 12270 | gcc_assert (REG_P (reg)); |
| 12271 | new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, |
| 12272 | new_rtx, reg, 1, OPTAB_DIRECT); |
| 12273 | } |
| 12274 | else |
| 12275 | new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
| 12276 | } |
| 12277 | else if ((SYMBOL_REF_P (addr) && SYMBOL_REF_TLS_MODEL (addr) == 0) |
| 12278 | /* We can't always use @GOTOFF for text labels |
| 12279 | on VxWorks, see gotoff_operand. */ |
| 12280 | || (TARGET_VXWORKS_VAROFF && LABEL_REF_P (addr))) |
| 12281 | { |
| 12282 | #if TARGET_PECOFF |
| 12283 | rtx tmp = legitimize_pe_coff_symbol (addr, true); |
| 12284 | if (tmp) |
| 12285 | return tmp; |
| 12286 | #endif |
| 12287 | |
| 12288 | /* For x64 PE-COFF there is no GOT table, |
| 12289 | so we use address directly. */ |
| 12290 | if (TARGET_64BIT && TARGET_PECOFF) |
| 12291 | { |
| 12292 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL); |
| 12293 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
| 12294 | } |
| 12295 | else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) |
| 12296 | { |
| 12297 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), |
| 12298 | UNSPEC_GOTPCREL); |
| 12299 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
| 12300 | new_rtx = gen_const_mem (Pmode, new_rtx); |
| 12301 | set_mem_alias_set (new_rtx, GOT_ALIAS_SET); |
| 12302 | } |
| 12303 | else |
| 12304 | { |
| 12305 | /* This symbol must be referenced via a load |
| 12306 | from the Global Offset Table (@GOT). */ |
| 12307 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); |
| 12308 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
| 12309 | |
| 12310 | if (TARGET_64BIT) |
| 12311 | new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); |
| 12312 | |
| 12313 | if (reg != 0) |
| 12314 | { |
| 12315 | gcc_assert (REG_P (reg)); |
| 12316 | new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, |
| 12317 | new_rtx, reg, 1, OPTAB_DIRECT); |
| 12318 | } |
| 12319 | else |
| 12320 | new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
| 12321 | |
| 12322 | new_rtx = gen_const_mem (Pmode, new_rtx); |
| 12323 | set_mem_alias_set (new_rtx, GOT_ALIAS_SET); |
| 12324 | } |
| 12325 | |
| 12326 | new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); |
| 12327 | } |
| 12328 | else |
| 12329 | { |
| 12330 | if (CONST_INT_P (addr) |
| 12331 | && !x86_64_immediate_operand (addr, VOIDmode)) |
| 12332 | new_rtx = copy_to_suggested_reg (addr, reg, Pmode); |
| 12333 | else if (GET_CODE (addr) == CONST) |
| 12334 | { |
| 12335 | addr = XEXP (addr, 0); |
| 12336 | |
| 12337 | /* We must match stuff we generate before. Assume the only |
| 12338 | unspecs that can get here are ours. Not that we could do |
| 12339 | anything with them anyway.... */ |
| 12340 | if (GET_CODE (addr) == UNSPEC |
| 12341 | || (GET_CODE (addr) == PLUS |
| 12342 | && GET_CODE (XEXP (addr, 0)) == UNSPEC)) |
| 12343 | return orig; |
| 12344 | gcc_assert (GET_CODE (addr) == PLUS); |
| 12345 | } |
| 12346 | |
| 12347 | if (GET_CODE (addr) == PLUS) |
| 12348 | { |
| 12349 | rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); |
| 12350 | |
| 12351 | /* Check first to see if this is a constant |
| 12352 | offset from a @GOTOFF symbol reference. */ |
| 12353 | if (!TARGET_PECOFF |
| 12354 | && gotoff_operand (op0, Pmode) |
| 12355 | && CONST_INT_P (op1)) |
| 12356 | { |
| 12357 | if (!TARGET_64BIT) |
| 12358 | { |
| 12359 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), |
| 12360 | UNSPEC_GOTOFF); |
| 12361 | new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); |
| 12362 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
| 12363 | |
| 12364 | if (reg != 0) |
| 12365 | { |
| 12366 | gcc_assert (REG_P (reg)); |
| 12367 | new_rtx = expand_simple_binop (Pmode, PLUS, |
| 12368 | pic_offset_table_rtx, |
| 12369 | new_rtx, reg, 1, |
| 12370 | OPTAB_DIRECT); |
| 12371 | } |
| 12372 | else |
| 12373 | new_rtx |
| 12374 | = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
| 12375 | } |
| 12376 | else |
| 12377 | { |
| 12378 | if (INTVAL (op1) < -16*1024*1024 |
| 12379 | || INTVAL (op1) >= 16*1024*1024) |
| 12380 | { |
| 12381 | if (!x86_64_immediate_operand (op1, Pmode)) |
| 12382 | op1 = force_reg (Pmode, op1); |
| 12383 | |
| 12384 | new_rtx |
| 12385 | = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); |
| 12386 | } |
| 12387 | } |
| 12388 | } |
| 12389 | else |
| 12390 | { |
| 12391 | rtx base = legitimize_pic_address (orig: op0, reg); |
| 12392 | machine_mode mode = GET_MODE (base); |
| 12393 | new_rtx |
| 12394 | = legitimize_pic_address (orig: op1, reg: base == reg ? NULL_RTX : reg); |
| 12395 | |
| 12396 | if (CONST_INT_P (new_rtx)) |
| 12397 | { |
| 12398 | if (INTVAL (new_rtx) < -16*1024*1024 |
| 12399 | || INTVAL (new_rtx) >= 16*1024*1024) |
| 12400 | { |
| 12401 | if (!x86_64_immediate_operand (new_rtx, mode)) |
| 12402 | new_rtx = force_reg (mode, new_rtx); |
| 12403 | |
| 12404 | new_rtx |
| 12405 | = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx); |
| 12406 | } |
| 12407 | else |
| 12408 | new_rtx = plus_constant (mode, base, INTVAL (new_rtx)); |
| 12409 | } |
| 12410 | else |
| 12411 | { |
| 12412 | /* For %rip addressing, we have to use |
| 12413 | just disp32, not base nor index. */ |
| 12414 | if (TARGET_64BIT |
| 12415 | && (SYMBOL_REF_P (base) |
| 12416 | || LABEL_REF_P (base))) |
| 12417 | base = force_reg (mode, base); |
| 12418 | if (GET_CODE (new_rtx) == PLUS |
| 12419 | && CONSTANT_P (XEXP (new_rtx, 1))) |
| 12420 | { |
| 12421 | base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0)); |
| 12422 | new_rtx = XEXP (new_rtx, 1); |
| 12423 | } |
| 12424 | new_rtx = gen_rtx_PLUS (mode, base, new_rtx); |
| 12425 | } |
| 12426 | } |
| 12427 | } |
| 12428 | } |
| 12429 | return new_rtx; |
| 12430 | } |
| 12431 | |
| 12432 | /* Load the thread pointer. If TO_REG is true, force it into a register. */ |
| 12433 | |
| 12434 | static rtx |
| 12435 | get_thread_pointer (machine_mode tp_mode, bool to_reg) |
| 12436 | { |
| 12437 | rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); |
| 12438 | |
| 12439 | if (GET_MODE (tp) != tp_mode) |
| 12440 | { |
| 12441 | gcc_assert (GET_MODE (tp) == SImode); |
| 12442 | gcc_assert (tp_mode == DImode); |
| 12443 | |
| 12444 | tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); |
| 12445 | } |
| 12446 | |
| 12447 | if (to_reg) |
| 12448 | tp = copy_to_mode_reg (tp_mode, tp); |
| 12449 | |
| 12450 | return tp; |
| 12451 | } |
| 12452 | |
| 12453 | /* Construct the SYMBOL_REF for the _tls_index symbol. */ |
| 12454 | |
| 12455 | static GTY(()) rtx ix86_tls_index_symbol; |
| 12456 | |
| 12457 | static rtx |
| 12458 | ix86_tls_index (void) |
| 12459 | { |
| 12460 | if (!ix86_tls_index_symbol) |
| 12461 | ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index" ); |
| 12462 | |
| 12463 | if (flag_pic) |
| 12464 | return gen_rtx_CONST (Pmode, |
| 12465 | gen_rtx_UNSPEC (Pmode, |
| 12466 | gen_rtvec (1, ix86_tls_index_symbol), |
| 12467 | UNSPEC_PCREL)); |
| 12468 | else |
| 12469 | return ix86_tls_index_symbol; |
| 12470 | } |
| 12471 | |
| 12472 | /* Construct the SYMBOL_REF for the tls_get_addr function. */ |
| 12473 | |
| 12474 | static GTY(()) rtx ix86_tls_symbol; |
| 12475 | |
| 12476 | rtx |
| 12477 | ix86_tls_get_addr (void) |
| 12478 | { |
| 12479 | if (cfun->machine->call_saved_registers |
| 12480 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
| 12481 | { |
| 12482 | /* __tls_get_addr doesn't preserve vector registers. When a |
| 12483 | function with no_caller_saved_registers attribute calls |
| 12484 | __tls_get_addr, YMM and ZMM registers will be clobbered. |
| 12485 | Issue an error and suggest -mtls-dialect=gnu2 in this case. */ |
| 12486 | if (cfun->machine->func_type == TYPE_NORMAL) |
| 12487 | error (G_("%<-mtls-dialect=gnu2%> must be used with a function" |
| 12488 | " with the %<no_caller_saved_registers%> attribute" )); |
| 12489 | else |
| 12490 | error (cfun->machine->func_type == TYPE_EXCEPTION |
| 12491 | ? G_("%<-mtls-dialect=gnu2%> must be used with an" |
| 12492 | " exception service routine" ) |
| 12493 | : G_("%<-mtls-dialect=gnu2%> must be used with an" |
| 12494 | " interrupt service routine" )); |
| 12495 | /* Don't issue the same error twice. */ |
| 12496 | cfun->machine->func_type = TYPE_NORMAL; |
| 12497 | cfun->machine->call_saved_registers |
| 12498 | = TYPE_DEFAULT_CALL_SAVED_REGISTERS; |
| 12499 | } |
| 12500 | |
| 12501 | if (!ix86_tls_symbol) |
| 12502 | { |
| 12503 | const char *sym |
| 12504 | = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT) |
| 12505 | ? "___tls_get_addr" : "__tls_get_addr" ); |
| 12506 | |
| 12507 | ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym); |
| 12508 | } |
| 12509 | |
| 12510 | if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF) |
| 12511 | { |
| 12512 | rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol), |
| 12513 | UNSPEC_PLTOFF); |
| 12514 | return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, |
| 12515 | gen_rtx_CONST (Pmode, unspec)); |
| 12516 | } |
| 12517 | |
| 12518 | return ix86_tls_symbol; |
| 12519 | } |
| 12520 | |
| 12521 | /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ |
| 12522 | |
| 12523 | static GTY(()) rtx ix86_tls_module_base_symbol; |
| 12524 | |
| 12525 | rtx |
| 12526 | ix86_tls_module_base (void) |
| 12527 | { |
| 12528 | if (!ix86_tls_module_base_symbol) |
| 12529 | { |
| 12530 | ix86_tls_module_base_symbol |
| 12531 | = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_" ); |
| 12532 | |
| 12533 | SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) |
| 12534 | |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; |
| 12535 | } |
| 12536 | |
| 12537 | return ix86_tls_module_base_symbol; |
| 12538 | } |
| 12539 | |
| 12540 | /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is |
| 12541 | false if we expect this to be used for a memory address and true if |
| 12542 | we expect to load the address into a register. */ |
| 12543 | |
| 12544 | rtx |
| 12545 | legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) |
| 12546 | { |
| 12547 | rtx dest, base, off; |
| 12548 | rtx pic = NULL_RTX, tp = NULL_RTX; |
| 12549 | machine_mode tp_mode = Pmode; |
| 12550 | int type; |
| 12551 | |
| 12552 | /* Windows implements a single form of TLS. */ |
| 12553 | if (TARGET_WIN32_TLS) |
| 12554 | { |
| 12555 | /* Load the 32-bit index. */ |
| 12556 | rtx ind = gen_const_mem (SImode, ix86_tls_index ()); |
| 12557 | set_mem_alias_set (ind, GOT_ALIAS_SET); |
| 12558 | if (TARGET_64BIT) |
| 12559 | ind = convert_to_mode (Pmode, ind, 1); |
| 12560 | ind = force_reg (Pmode, ind); |
| 12561 | |
| 12562 | /* Add it to the thread pointer and load the base. */ |
| 12563 | tp = get_thread_pointer (Pmode, to_reg: true); |
| 12564 | rtx addr = gen_rtx_PLUS (Pmode, tp, |
| 12565 | gen_rtx_MULT (Pmode, ind, |
| 12566 | GEN_INT (UNITS_PER_WORD))); |
| 12567 | base = gen_const_mem (Pmode, addr); |
| 12568 | set_mem_alias_set (base, GOT_ALIAS_SET); |
| 12569 | |
| 12570 | /* Add the 32-bit section-relative offset to the base. */ |
| 12571 | base = force_reg (Pmode, base); |
| 12572 | off = gen_rtx_CONST (Pmode, |
| 12573 | gen_rtx_UNSPEC (SImode, |
| 12574 | gen_rtvec (1, x), |
| 12575 | UNSPEC_SECREL32)); |
| 12576 | return gen_rtx_PLUS (Pmode, base, off); |
| 12577 | } |
| 12578 | |
| 12579 | /* Fall back to global dynamic model if tool chain cannot support local |
| 12580 | dynamic. */ |
| 12581 | if (TARGET_SUN_TLS && !TARGET_64BIT |
| 12582 | && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM |
| 12583 | && model == TLS_MODEL_LOCAL_DYNAMIC) |
| 12584 | model = TLS_MODEL_GLOBAL_DYNAMIC; |
| 12585 | |
| 12586 | switch (model) |
| 12587 | { |
| 12588 | case TLS_MODEL_GLOBAL_DYNAMIC: |
| 12589 | if (!TARGET_64BIT) |
| 12590 | { |
| 12591 | if (flag_pic && !TARGET_PECOFF) |
| 12592 | pic = pic_offset_table_rtx; |
| 12593 | else |
| 12594 | { |
| 12595 | pic = gen_reg_rtx (Pmode); |
| 12596 | emit_insn (gen_set_got (pic)); |
| 12597 | } |
| 12598 | } |
| 12599 | |
| 12600 | if (TARGET_GNU2_TLS) |
| 12601 | { |
| 12602 | dest = gen_reg_rtx (ptr_mode); |
| 12603 | if (TARGET_64BIT) |
| 12604 | emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: dest, x1: x)); |
| 12605 | else |
| 12606 | emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); |
| 12607 | |
| 12608 | tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true); |
| 12609 | dest = gen_rtx_PLUS (ptr_mode, tp, dest); |
| 12610 | if (GET_MODE (dest) != Pmode) |
| 12611 | dest = gen_rtx_ZERO_EXTEND (Pmode, dest); |
| 12612 | dest = force_reg (Pmode, dest); |
| 12613 | |
| 12614 | if (GET_MODE (x) != Pmode) |
| 12615 | x = gen_rtx_ZERO_EXTEND (Pmode, x); |
| 12616 | |
| 12617 | set_unique_reg_note (get_last_insn (), REG_EQUAL, x); |
| 12618 | } |
| 12619 | else |
| 12620 | { |
| 12621 | rtx caddr = ix86_tls_get_addr (); |
| 12622 | |
| 12623 | dest = gen_reg_rtx (Pmode); |
| 12624 | if (TARGET_64BIT) |
| 12625 | { |
| 12626 | rtx rax = gen_rtx_REG (Pmode, AX_REG); |
| 12627 | rtx rdi = gen_rtx_REG (Pmode, DI_REG); |
| 12628 | rtx_insn *insns; |
| 12629 | |
| 12630 | start_sequence (); |
| 12631 | emit_call_insn |
| 12632 | (gen_tls_global_dynamic_64 (Pmode, x0: rax, x1: x, x2: caddr, x3: rdi)); |
| 12633 | insns = end_sequence (); |
| 12634 | |
| 12635 | if (GET_MODE (x) != Pmode) |
| 12636 | x = gen_rtx_ZERO_EXTEND (Pmode, x); |
| 12637 | |
| 12638 | RTL_CONST_CALL_P (insns) = 1; |
| 12639 | emit_libcall_block (insns, dest, rax, x); |
| 12640 | } |
| 12641 | else |
| 12642 | emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr)); |
| 12643 | } |
| 12644 | break; |
| 12645 | |
| 12646 | case TLS_MODEL_LOCAL_DYNAMIC: |
| 12647 | if (!TARGET_64BIT) |
| 12648 | { |
| 12649 | if (flag_pic) |
| 12650 | pic = pic_offset_table_rtx; |
| 12651 | else |
| 12652 | { |
| 12653 | pic = gen_reg_rtx (Pmode); |
| 12654 | emit_insn (gen_set_got (pic)); |
| 12655 | } |
| 12656 | } |
| 12657 | |
| 12658 | if (TARGET_GNU2_TLS) |
| 12659 | { |
| 12660 | rtx tmp = ix86_tls_module_base (); |
| 12661 | |
| 12662 | base = gen_reg_rtx (ptr_mode); |
| 12663 | if (TARGET_64BIT) |
| 12664 | emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: base, x1: tmp)); |
| 12665 | else |
| 12666 | emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); |
| 12667 | |
| 12668 | tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true); |
| 12669 | if (GET_MODE (base) != Pmode) |
| 12670 | base = gen_rtx_ZERO_EXTEND (Pmode, base); |
| 12671 | base = force_reg (Pmode, base); |
| 12672 | } |
| 12673 | else |
| 12674 | { |
| 12675 | rtx caddr = ix86_tls_get_addr (); |
| 12676 | |
| 12677 | base = gen_reg_rtx (Pmode); |
| 12678 | if (TARGET_64BIT) |
| 12679 | { |
| 12680 | rtx rax = gen_rtx_REG (Pmode, AX_REG); |
| 12681 | rtx rdi = gen_rtx_REG (Pmode, DI_REG); |
| 12682 | rtx_insn *insns; |
| 12683 | rtx eqv; |
| 12684 | |
| 12685 | start_sequence (); |
| 12686 | emit_call_insn |
| 12687 | (gen_tls_local_dynamic_base_64 (Pmode, x0: rax, x1: caddr, x2: rdi)); |
| 12688 | insns = end_sequence (); |
| 12689 | |
| 12690 | /* Attach a unique REG_EQUAL, to allow the RTL optimizers to |
| 12691 | share the LD_BASE result with other LD model accesses. */ |
| 12692 | eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), |
| 12693 | UNSPEC_TLS_LD_BASE); |
| 12694 | |
| 12695 | RTL_CONST_CALL_P (insns) = 1; |
| 12696 | emit_libcall_block (insns, base, rax, eqv); |
| 12697 | } |
| 12698 | else |
| 12699 | emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); |
| 12700 | } |
| 12701 | |
| 12702 | off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); |
| 12703 | off = gen_rtx_CONST (Pmode, off); |
| 12704 | |
| 12705 | dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); |
| 12706 | |
| 12707 | if (TARGET_GNU2_TLS) |
| 12708 | { |
| 12709 | if (GET_MODE (tp) != Pmode) |
| 12710 | { |
| 12711 | dest = lowpart_subreg (outermode: ptr_mode, op: dest, Pmode); |
| 12712 | dest = gen_rtx_PLUS (ptr_mode, tp, dest); |
| 12713 | dest = gen_rtx_ZERO_EXTEND (Pmode, dest); |
| 12714 | } |
| 12715 | else |
| 12716 | dest = gen_rtx_PLUS (Pmode, tp, dest); |
| 12717 | dest = force_reg (Pmode, dest); |
| 12718 | |
| 12719 | if (GET_MODE (x) != Pmode) |
| 12720 | x = gen_rtx_ZERO_EXTEND (Pmode, x); |
| 12721 | |
| 12722 | set_unique_reg_note (get_last_insn (), REG_EQUAL, x); |
| 12723 | } |
| 12724 | break; |
| 12725 | |
| 12726 | case TLS_MODEL_INITIAL_EXEC: |
| 12727 | if (TARGET_64BIT) |
| 12728 | { |
| 12729 | /* Generate DImode references to avoid %fs:(%reg32) |
| 12730 | problems and linker IE->LE relaxation bug. */ |
| 12731 | tp_mode = DImode; |
| 12732 | pic = NULL; |
| 12733 | type = UNSPEC_GOTNTPOFF; |
| 12734 | } |
| 12735 | else if (flag_pic) |
| 12736 | { |
| 12737 | pic = pic_offset_table_rtx; |
| 12738 | type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; |
| 12739 | } |
| 12740 | else if (!TARGET_ANY_GNU_TLS) |
| 12741 | { |
| 12742 | pic = gen_reg_rtx (Pmode); |
| 12743 | emit_insn (gen_set_got (pic)); |
| 12744 | type = UNSPEC_GOTTPOFF; |
| 12745 | } |
| 12746 | else |
| 12747 | { |
| 12748 | pic = NULL; |
| 12749 | type = UNSPEC_INDNTPOFF; |
| 12750 | } |
| 12751 | |
| 12752 | off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); |
| 12753 | off = gen_rtx_CONST (tp_mode, off); |
| 12754 | if (pic) |
| 12755 | off = gen_rtx_PLUS (tp_mode, pic, off); |
| 12756 | off = gen_const_mem (tp_mode, off); |
| 12757 | set_mem_alias_set (off, GOT_ALIAS_SET); |
| 12758 | |
| 12759 | if (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
| 12760 | { |
| 12761 | base = get_thread_pointer (tp_mode, |
| 12762 | to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS); |
| 12763 | off = force_reg (tp_mode, off); |
| 12764 | dest = gen_rtx_PLUS (tp_mode, base, off); |
| 12765 | if (tp_mode != Pmode) |
| 12766 | dest = convert_to_mode (Pmode, dest, 1); |
| 12767 | } |
| 12768 | else |
| 12769 | { |
| 12770 | base = get_thread_pointer (Pmode, to_reg: true); |
| 12771 | dest = gen_reg_rtx (Pmode); |
| 12772 | emit_insn (gen_sub3_insn (dest, base, off)); |
| 12773 | } |
| 12774 | break; |
| 12775 | |
| 12776 | case TLS_MODEL_LOCAL_EXEC: |
| 12777 | off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), |
| 12778 | (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
| 12779 | ? UNSPEC_NTPOFF : UNSPEC_TPOFF); |
| 12780 | off = gen_rtx_CONST (Pmode, off); |
| 12781 | |
| 12782 | if (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
| 12783 | { |
| 12784 | base = get_thread_pointer (Pmode, |
| 12785 | to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS); |
| 12786 | return gen_rtx_PLUS (Pmode, base, off); |
| 12787 | } |
| 12788 | else |
| 12789 | { |
| 12790 | base = get_thread_pointer (Pmode, to_reg: true); |
| 12791 | dest = gen_reg_rtx (Pmode); |
| 12792 | emit_insn (gen_sub3_insn (dest, base, off)); |
| 12793 | } |
| 12794 | break; |
| 12795 | |
| 12796 | default: |
| 12797 | gcc_unreachable (); |
| 12798 | } |
| 12799 | |
| 12800 | return dest; |
| 12801 | } |
| 12802 | |
| 12803 | /* Return true if the TLS address requires insn using integer registers. |
| 12804 | It's used to prevent KMOV/VMOV in TLS code sequences which require integer |
| 12805 | MOV instructions, refer to PR103275. */ |
| 12806 | bool |
| 12807 | ix86_gpr_tls_address_pattern_p (rtx mem) |
| 12808 | { |
| 12809 | gcc_assert (MEM_P (mem)); |
| 12810 | |
| 12811 | rtx addr = XEXP (mem, 0); |
| 12812 | subrtx_var_iterator::array_type array; |
| 12813 | FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL) |
| 12814 | { |
| 12815 | rtx op = *iter; |
| 12816 | if (GET_CODE (op) == UNSPEC) |
| 12817 | switch (XINT (op, 1)) |
| 12818 | { |
| 12819 | case UNSPEC_GOTNTPOFF: |
| 12820 | return true; |
| 12821 | case UNSPEC_TPOFF: |
| 12822 | if (!TARGET_64BIT) |
| 12823 | return true; |
| 12824 | break; |
| 12825 | default: |
| 12826 | break; |
| 12827 | } |
| 12828 | } |
| 12829 | |
| 12830 | return false; |
| 12831 | } |
| 12832 | |
| 12833 | /* Return true if OP refers to a TLS address. */ |
| 12834 | bool |
| 12835 | ix86_tls_address_pattern_p (rtx op) |
| 12836 | { |
| 12837 | subrtx_var_iterator::array_type array; |
| 12838 | FOR_EACH_SUBRTX_VAR (iter, array, op, ALL) |
| 12839 | { |
| 12840 | rtx op = *iter; |
| 12841 | if (MEM_P (op)) |
| 12842 | { |
| 12843 | rtx *x = &XEXP (op, 0); |
| 12844 | while (GET_CODE (*x) == PLUS) |
| 12845 | { |
| 12846 | int i; |
| 12847 | for (i = 0; i < 2; i++) |
| 12848 | { |
| 12849 | rtx u = XEXP (*x, i); |
| 12850 | if (GET_CODE (u) == ZERO_EXTEND) |
| 12851 | u = XEXP (u, 0); |
| 12852 | if (GET_CODE (u) == UNSPEC |
| 12853 | && XINT (u, 1) == UNSPEC_TP) |
| 12854 | return true; |
| 12855 | } |
| 12856 | x = &XEXP (*x, 0); |
| 12857 | } |
| 12858 | |
| 12859 | iter.skip_subrtxes (); |
| 12860 | } |
| 12861 | } |
| 12862 | |
| 12863 | return false; |
| 12864 | } |
| 12865 | |
| 12866 | /* Rewrite *LOC so that it refers to a default TLS address space. */ |
| 12867 | static void |
| 12868 | ix86_rewrite_tls_address_1 (rtx *loc) |
| 12869 | { |
| 12870 | subrtx_ptr_iterator::array_type array; |
| 12871 | FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) |
| 12872 | { |
| 12873 | rtx *loc = *iter; |
| 12874 | if (MEM_P (*loc)) |
| 12875 | { |
| 12876 | rtx addr = XEXP (*loc, 0); |
| 12877 | rtx *x = &addr; |
| 12878 | while (GET_CODE (*x) == PLUS) |
| 12879 | { |
| 12880 | int i; |
| 12881 | for (i = 0; i < 2; i++) |
| 12882 | { |
| 12883 | rtx u = XEXP (*x, i); |
| 12884 | if (GET_CODE (u) == ZERO_EXTEND) |
| 12885 | u = XEXP (u, 0); |
| 12886 | if (GET_CODE (u) == UNSPEC |
| 12887 | && XINT (u, 1) == UNSPEC_TP) |
| 12888 | { |
| 12889 | /* NB: Since address override only applies to the |
| 12890 | (reg32) part in fs:(reg32), return if address |
| 12891 | override is used. */ |
| 12892 | if (Pmode != word_mode |
| 12893 | && REG_P (XEXP (*x, 1 - i))) |
| 12894 | return; |
| 12895 | |
| 12896 | addr_space_t as = DEFAULT_TLS_SEG_REG; |
| 12897 | |
| 12898 | *x = XEXP (*x, 1 - i); |
| 12899 | |
| 12900 | *loc = replace_equiv_address_nv (*loc, addr, true); |
| 12901 | set_mem_addr_space (*loc, as); |
| 12902 | return; |
| 12903 | } |
| 12904 | } |
| 12905 | x = &XEXP (*x, 0); |
| 12906 | } |
| 12907 | |
| 12908 | iter.skip_subrtxes (); |
| 12909 | } |
| 12910 | } |
| 12911 | } |
| 12912 | |
| 12913 | /* Rewrite instruction pattern involvning TLS address |
| 12914 | so that it refers to a default TLS address space. */ |
| 12915 | rtx |
| 12916 | ix86_rewrite_tls_address (rtx pattern) |
| 12917 | { |
| 12918 | pattern = copy_insn (pattern); |
| 12919 | ix86_rewrite_tls_address_1 (loc: &pattern); |
| 12920 | return pattern; |
| 12921 | } |
| 12922 | |
| 12923 | /* Try machine-dependent ways of modifying an illegitimate address |
| 12924 | to be legitimate. If we find one, return the new, valid address. |
| 12925 | This macro is used in only one place: `memory_address' in explow.cc. |
| 12926 | |
| 12927 | OLDX is the address as it was before break_out_memory_refs was called. |
| 12928 | In some cases it is useful to look at this to decide what needs to be done. |
| 12929 | |
| 12930 | It is always safe for this macro to do nothing. It exists to recognize |
| 12931 | opportunities to optimize the output. |
| 12932 | |
| 12933 | For the 80386, we handle X+REG by loading X into a register R and |
| 12934 | using R+REG. R will go in a general reg and indexing will be used. |
| 12935 | However, if REG is a broken-out memory address or multiplication, |
| 12936 | nothing needs to be done because REG can certainly go in a general reg. |
| 12937 | |
| 12938 | When -fpic is used, special handling is needed for symbolic references. |
| 12939 | See comments by legitimize_pic_address in i386.cc for details. */ |
| 12940 | |
| 12941 | static rtx |
| 12942 | ix86_legitimize_address (rtx x, rtx, machine_mode mode) |
| 12943 | { |
| 12944 | bool changed = false; |
| 12945 | unsigned log; |
| 12946 | |
| 12947 | log = SYMBOL_REF_P (x) ? SYMBOL_REF_TLS_MODEL (x) : 0; |
| 12948 | if (log) |
| 12949 | return legitimize_tls_address (x, model: (enum tls_model) log, for_mov: false); |
| 12950 | if (GET_CODE (x) == CONST |
| 12951 | && GET_CODE (XEXP (x, 0)) == PLUS |
| 12952 | && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0)) |
| 12953 | && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) |
| 12954 | { |
| 12955 | rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), |
| 12956 | model: (enum tls_model) log, for_mov: false); |
| 12957 | return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); |
| 12958 | } |
| 12959 | |
| 12960 | if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
| 12961 | { |
| 12962 | #if TARGET_PECOFF |
| 12963 | rtx tmp = legitimize_pe_coff_symbol (x, true); |
| 12964 | if (tmp) |
| 12965 | return tmp; |
| 12966 | #endif |
| 12967 | } |
| 12968 | |
| 12969 | if (flag_pic && SYMBOLIC_CONST (x)) |
| 12970 | return legitimize_pic_address (orig: x, reg: 0); |
| 12971 | |
| 12972 | #if TARGET_MACHO |
| 12973 | if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) |
| 12974 | return machopic_indirect_data_reference (x, 0); |
| 12975 | #endif |
| 12976 | |
| 12977 | /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ |
| 12978 | if (GET_CODE (x) == ASHIFT |
| 12979 | && CONST_INT_P (XEXP (x, 1)) |
| 12980 | && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) |
| 12981 | { |
| 12982 | changed = true; |
| 12983 | log = INTVAL (XEXP (x, 1)); |
| 12984 | x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), |
| 12985 | GEN_INT (1 << log)); |
| 12986 | } |
| 12987 | |
| 12988 | if (GET_CODE (x) == PLUS) |
| 12989 | { |
| 12990 | /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ |
| 12991 | |
| 12992 | if (GET_CODE (XEXP (x, 0)) == ASHIFT |
| 12993 | && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
| 12994 | && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) |
| 12995 | { |
| 12996 | changed = true; |
| 12997 | log = INTVAL (XEXP (XEXP (x, 0), 1)); |
| 12998 | XEXP (x, 0) = gen_rtx_MULT (Pmode, |
| 12999 | force_reg (Pmode, XEXP (XEXP (x, 0), 0)), |
| 13000 | GEN_INT (1 << log)); |
| 13001 | } |
| 13002 | |
| 13003 | if (GET_CODE (XEXP (x, 1)) == ASHIFT |
| 13004 | && CONST_INT_P (XEXP (XEXP (x, 1), 1)) |
| 13005 | && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) |
| 13006 | { |
| 13007 | changed = true; |
| 13008 | log = INTVAL (XEXP (XEXP (x, 1), 1)); |
| 13009 | XEXP (x, 1) = gen_rtx_MULT (Pmode, |
| 13010 | force_reg (Pmode, XEXP (XEXP (x, 1), 0)), |
| 13011 | GEN_INT (1 << log)); |
| 13012 | } |
| 13013 | |
| 13014 | /* Put multiply first if it isn't already. */ |
| 13015 | if (GET_CODE (XEXP (x, 1)) == MULT) |
| 13016 | { |
| 13017 | std::swap (XEXP (x, 0), XEXP (x, 1)); |
| 13018 | changed = true; |
| 13019 | } |
| 13020 | |
| 13021 | /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) |
| 13022 | into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be |
| 13023 | created by virtual register instantiation, register elimination, and |
| 13024 | similar optimizations. */ |
| 13025 | if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) |
| 13026 | { |
| 13027 | changed = true; |
| 13028 | x = gen_rtx_PLUS (Pmode, |
| 13029 | gen_rtx_PLUS (Pmode, XEXP (x, 0), |
| 13030 | XEXP (XEXP (x, 1), 0)), |
| 13031 | XEXP (XEXP (x, 1), 1)); |
| 13032 | } |
| 13033 | |
| 13034 | /* Canonicalize |
| 13035 | (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) |
| 13036 | into (plus (plus (mult (reg) (const)) (reg)) (const)). */ |
| 13037 | else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS |
| 13038 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
| 13039 | && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS |
| 13040 | && CONSTANT_P (XEXP (x, 1))) |
| 13041 | { |
| 13042 | rtx constant; |
| 13043 | rtx other = NULL_RTX; |
| 13044 | |
| 13045 | if (CONST_INT_P (XEXP (x, 1))) |
| 13046 | { |
| 13047 | constant = XEXP (x, 1); |
| 13048 | other = XEXP (XEXP (XEXP (x, 0), 1), 1); |
| 13049 | } |
| 13050 | else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) |
| 13051 | { |
| 13052 | constant = XEXP (XEXP (XEXP (x, 0), 1), 1); |
| 13053 | other = XEXP (x, 1); |
| 13054 | } |
| 13055 | else |
| 13056 | constant = 0; |
| 13057 | |
| 13058 | if (constant) |
| 13059 | { |
| 13060 | changed = true; |
| 13061 | x = gen_rtx_PLUS (Pmode, |
| 13062 | gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), |
| 13063 | XEXP (XEXP (XEXP (x, 0), 1), 0)), |
| 13064 | plus_constant (Pmode, other, |
| 13065 | INTVAL (constant))); |
| 13066 | } |
| 13067 | } |
| 13068 | |
| 13069 | if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false)) |
| 13070 | return x; |
| 13071 | |
| 13072 | if (GET_CODE (XEXP (x, 0)) == MULT) |
| 13073 | { |
| 13074 | changed = true; |
| 13075 | XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); |
| 13076 | } |
| 13077 | |
| 13078 | if (GET_CODE (XEXP (x, 1)) == MULT) |
| 13079 | { |
| 13080 | changed = true; |
| 13081 | XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); |
| 13082 | } |
| 13083 | |
| 13084 | if (changed |
| 13085 | && REG_P (XEXP (x, 1)) |
| 13086 | && REG_P (XEXP (x, 0))) |
| 13087 | return x; |
| 13088 | |
| 13089 | if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) |
| 13090 | { |
| 13091 | changed = true; |
| 13092 | x = legitimize_pic_address (orig: x, reg: 0); |
| 13093 | } |
| 13094 | |
| 13095 | if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false)) |
| 13096 | return x; |
| 13097 | |
| 13098 | if (REG_P (XEXP (x, 0))) |
| 13099 | { |
| 13100 | rtx temp = gen_reg_rtx (Pmode); |
| 13101 | rtx val = force_operand (XEXP (x, 1), temp); |
| 13102 | if (val != temp) |
| 13103 | { |
| 13104 | val = convert_to_mode (Pmode, val, 1); |
| 13105 | emit_move_insn (temp, val); |
| 13106 | } |
| 13107 | |
| 13108 | XEXP (x, 1) = temp; |
| 13109 | return x; |
| 13110 | } |
| 13111 | |
| 13112 | else if (REG_P (XEXP (x, 1))) |
| 13113 | { |
| 13114 | rtx temp = gen_reg_rtx (Pmode); |
| 13115 | rtx val = force_operand (XEXP (x, 0), temp); |
| 13116 | if (val != temp) |
| 13117 | { |
| 13118 | val = convert_to_mode (Pmode, val, 1); |
| 13119 | emit_move_insn (temp, val); |
| 13120 | } |
| 13121 | |
| 13122 | XEXP (x, 0) = temp; |
| 13123 | return x; |
| 13124 | } |
| 13125 | } |
| 13126 | |
| 13127 | return x; |
| 13128 | } |
| 13129 | |
| 13130 | /* Print an integer constant expression in assembler syntax. Addition |
| 13131 | and subtraction are the only arithmetic that may appear in these |
| 13132 | expressions. FILE is the stdio stream to write to, X is the rtx, and |
| 13133 | CODE is the operand print code from the output string. */ |
| 13134 | |
| 13135 | static void |
| 13136 | output_pic_addr_const (FILE *file, rtx x, int code) |
| 13137 | { |
| 13138 | char buf[256]; |
| 13139 | |
| 13140 | switch (GET_CODE (x)) |
| 13141 | { |
| 13142 | case PC: |
| 13143 | gcc_assert (flag_pic); |
| 13144 | putc (c: '.', stream: file); |
| 13145 | break; |
| 13146 | |
| 13147 | case SYMBOL_REF: |
| 13148 | if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS) |
| 13149 | output_addr_const (file, x); |
| 13150 | else |
| 13151 | { |
| 13152 | const char *name = XSTR (x, 0); |
| 13153 | |
| 13154 | /* Mark the decl as referenced so that cgraph will |
| 13155 | output the function. */ |
| 13156 | if (SYMBOL_REF_DECL (x)) |
| 13157 | mark_decl_referenced (SYMBOL_REF_DECL (x)); |
| 13158 | |
| 13159 | #if TARGET_MACHO |
| 13160 | if (MACHOPIC_INDIRECT |
| 13161 | && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) |
| 13162 | name = machopic_indirection_name (x, /*stub_p=*/true); |
| 13163 | #endif |
| 13164 | assemble_name (file, name); |
| 13165 | } |
| 13166 | if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF) |
| 13167 | && code == 'P' && ix86_call_use_plt_p (x)) |
| 13168 | fputs (s: "@PLT" , stream: file); |
| 13169 | break; |
| 13170 | |
| 13171 | case LABEL_REF: |
| 13172 | x = XEXP (x, 0); |
| 13173 | /* FALLTHRU */ |
| 13174 | case CODE_LABEL: |
| 13175 | ASM_GENERATE_INTERNAL_LABEL (buf, "L" , CODE_LABEL_NUMBER (x)); |
| 13176 | assemble_name (asm_out_file, buf); |
| 13177 | break; |
| 13178 | |
| 13179 | CASE_CONST_SCALAR_INT: |
| 13180 | output_addr_const (file, x); |
| 13181 | break; |
| 13182 | |
| 13183 | case CONST: |
| 13184 | /* This used to output parentheses around the expression, |
| 13185 | but that does not work on the 386 (either ATT or BSD assembler). */ |
| 13186 | output_pic_addr_const (file, XEXP (x, 0), code); |
| 13187 | break; |
| 13188 | |
| 13189 | case CONST_DOUBLE: |
| 13190 | /* We can't handle floating point constants; |
| 13191 | TARGET_PRINT_OPERAND must handle them. */ |
| 13192 | output_operand_lossage ("floating constant misused" ); |
| 13193 | break; |
| 13194 | |
| 13195 | case PLUS: |
| 13196 | /* Some assemblers need integer constants to appear first. */ |
| 13197 | if (CONST_INT_P (XEXP (x, 0))) |
| 13198 | { |
| 13199 | output_pic_addr_const (file, XEXP (x, 0), code); |
| 13200 | putc (c: '+', stream: file); |
| 13201 | output_pic_addr_const (file, XEXP (x, 1), code); |
| 13202 | } |
| 13203 | else |
| 13204 | { |
| 13205 | gcc_assert (CONST_INT_P (XEXP (x, 1))); |
| 13206 | output_pic_addr_const (file, XEXP (x, 1), code); |
| 13207 | putc (c: '+', stream: file); |
| 13208 | output_pic_addr_const (file, XEXP (x, 0), code); |
| 13209 | } |
| 13210 | break; |
| 13211 | |
| 13212 | case MINUS: |
| 13213 | if (!TARGET_MACHO) |
| 13214 | putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', stream: file); |
| 13215 | output_pic_addr_const (file, XEXP (x, 0), code); |
| 13216 | putc (c: '-', stream: file); |
| 13217 | output_pic_addr_const (file, XEXP (x, 1), code); |
| 13218 | if (!TARGET_MACHO) |
| 13219 | putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', stream: file); |
| 13220 | break; |
| 13221 | |
| 13222 | case UNSPEC: |
| 13223 | gcc_assert (XVECLEN (x, 0) == 1); |
| 13224 | output_pic_addr_const (file, XVECEXP (x, 0, 0), code); |
| 13225 | switch (XINT (x, 1)) |
| 13226 | { |
| 13227 | case UNSPEC_GOT: |
| 13228 | fputs (s: "@GOT" , stream: file); |
| 13229 | break; |
| 13230 | case UNSPEC_GOTOFF: |
| 13231 | fputs (s: "@GOTOFF" , stream: file); |
| 13232 | break; |
| 13233 | case UNSPEC_PLTOFF: |
| 13234 | fputs (s: "@PLTOFF" , stream: file); |
| 13235 | break; |
| 13236 | case UNSPEC_PCREL: |
| 13237 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
| 13238 | "(%rip)" : "[rip]" , stream: file); |
| 13239 | break; |
| 13240 | case UNSPEC_GOTPCREL: |
| 13241 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
| 13242 | "@GOTPCREL(%rip)" : "@GOTPCREL[rip]" , stream: file); |
| 13243 | break; |
| 13244 | case UNSPEC_GOTTPOFF: |
| 13245 | /* FIXME: This might be @TPOFF in Sun ld too. */ |
| 13246 | fputs (s: "@gottpoff" , stream: file); |
| 13247 | break; |
| 13248 | case UNSPEC_TPOFF: |
| 13249 | fputs (s: "@tpoff" , stream: file); |
| 13250 | break; |
| 13251 | case UNSPEC_NTPOFF: |
| 13252 | if (TARGET_64BIT) |
| 13253 | fputs (s: "@tpoff" , stream: file); |
| 13254 | else |
| 13255 | fputs (s: "@ntpoff" , stream: file); |
| 13256 | break; |
| 13257 | case UNSPEC_DTPOFF: |
| 13258 | fputs (s: "@dtpoff" , stream: file); |
| 13259 | break; |
| 13260 | case UNSPEC_GOTNTPOFF: |
| 13261 | if (TARGET_64BIT) |
| 13262 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
| 13263 | "@gottpoff(%rip)" : "@gottpoff[rip]" , stream: file); |
| 13264 | else |
| 13265 | fputs (s: "@gotntpoff" , stream: file); |
| 13266 | break; |
| 13267 | case UNSPEC_INDNTPOFF: |
| 13268 | fputs (s: "@indntpoff" , stream: file); |
| 13269 | break; |
| 13270 | case UNSPEC_SECREL32: |
| 13271 | fputs (s: "@secrel32" , stream: file); |
| 13272 | break; |
| 13273 | #if TARGET_MACHO |
| 13274 | case UNSPEC_MACHOPIC_OFFSET: |
| 13275 | putc ('-', file); |
| 13276 | machopic_output_function_base_name (file); |
| 13277 | break; |
| 13278 | #endif |
| 13279 | default: |
| 13280 | output_operand_lossage ("invalid UNSPEC as operand" ); |
| 13281 | break; |
| 13282 | } |
| 13283 | break; |
| 13284 | |
| 13285 | default: |
| 13286 | output_operand_lossage ("invalid expression as operand" ); |
| 13287 | } |
| 13288 | } |
| 13289 | |
| 13290 | /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL. |
| 13291 | We need to emit DTP-relative relocations. */ |
| 13292 | |
| 13293 | static void ATTRIBUTE_UNUSED |
| 13294 | i386_output_dwarf_dtprel (FILE *file, int size, rtx x) |
| 13295 | { |
| 13296 | fputs (ASM_LONG, stream: file); |
| 13297 | output_addr_const (file, x); |
| 13298 | #if TARGET_WIN32_TLS |
| 13299 | fputs ("@secrel32" , file); |
| 13300 | #else |
| 13301 | fputs (s: "@dtpoff" , stream: file); |
| 13302 | #endif |
| 13303 | switch (size) |
| 13304 | { |
| 13305 | case 4: |
| 13306 | break; |
| 13307 | case 8: |
| 13308 | fputs (s: ", 0" , stream: file); |
| 13309 | break; |
| 13310 | default: |
| 13311 | gcc_unreachable (); |
| 13312 | } |
| 13313 | } |
| 13314 | |
| 13315 | /* Return true if X is a representation of the PIC register. This copes |
| 13316 | with calls from ix86_find_base_term, where the register might have |
| 13317 | been replaced by a cselib value. */ |
| 13318 | |
| 13319 | static bool |
| 13320 | ix86_pic_register_p (rtx x) |
| 13321 | { |
| 13322 | if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) |
| 13323 | return (pic_offset_table_rtx |
| 13324 | && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); |
| 13325 | else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT) |
| 13326 | return true; |
| 13327 | else if (!REG_P (x)) |
| 13328 | return false; |
| 13329 | else if (pic_offset_table_rtx) |
| 13330 | { |
| 13331 | if (REGNO (x) == REGNO (pic_offset_table_rtx)) |
| 13332 | return true; |
| 13333 | if (HARD_REGISTER_P (x) |
| 13334 | && !HARD_REGISTER_P (pic_offset_table_rtx) |
| 13335 | && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) |
| 13336 | return true; |
| 13337 | return false; |
| 13338 | } |
| 13339 | else |
| 13340 | return REGNO (x) == PIC_OFFSET_TABLE_REGNUM; |
| 13341 | } |
| 13342 | |
| 13343 | /* Helper function for ix86_delegitimize_address. |
| 13344 | Attempt to delegitimize TLS local-exec accesses. */ |
| 13345 | |
| 13346 | static rtx |
| 13347 | ix86_delegitimize_tls_address (rtx orig_x) |
| 13348 | { |
| 13349 | rtx x = orig_x, unspec; |
| 13350 | struct ix86_address addr; |
| 13351 | |
| 13352 | if (!TARGET_TLS_DIRECT_SEG_REFS) |
| 13353 | return orig_x; |
| 13354 | if (MEM_P (x)) |
| 13355 | x = XEXP (x, 0); |
| 13356 | if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode) |
| 13357 | return orig_x; |
| 13358 | if (ix86_decompose_address (addr: x, out: &addr) == 0 |
| 13359 | || addr.seg != DEFAULT_TLS_SEG_REG |
| 13360 | || addr.disp == NULL_RTX |
| 13361 | || GET_CODE (addr.disp) != CONST) |
| 13362 | return orig_x; |
| 13363 | unspec = XEXP (addr.disp, 0); |
| 13364 | if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1))) |
| 13365 | unspec = XEXP (unspec, 0); |
| 13366 | if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF) |
| 13367 | return orig_x; |
| 13368 | x = XVECEXP (unspec, 0, 0); |
| 13369 | gcc_assert (SYMBOL_REF_P (x)); |
| 13370 | if (unspec != XEXP (addr.disp, 0)) |
| 13371 | x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1)); |
| 13372 | if (addr.index) |
| 13373 | { |
| 13374 | rtx idx = addr.index; |
| 13375 | if (addr.scale != 1) |
| 13376 | idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale)); |
| 13377 | x = gen_rtx_PLUS (Pmode, idx, x); |
| 13378 | } |
| 13379 | if (addr.base) |
| 13380 | x = gen_rtx_PLUS (Pmode, addr.base, x); |
| 13381 | if (MEM_P (orig_x)) |
| 13382 | x = replace_equiv_address_nv (orig_x, x); |
| 13383 | return x; |
| 13384 | } |
| 13385 | |
| 13386 | /* In the name of slightly smaller debug output, and to cater to |
| 13387 | general assembler lossage, recognize PIC+GOTOFF and turn it back |
| 13388 | into a direct symbol reference. |
| 13389 | |
| 13390 | On Darwin, this is necessary to avoid a crash, because Darwin |
| 13391 | has a different PIC label for each routine but the DWARF debugging |
| 13392 | information is not associated with any particular routine, so it's |
| 13393 | necessary to remove references to the PIC label from RTL stored by |
| 13394 | the DWARF output code. |
| 13395 | |
| 13396 | This helper is used in the normal ix86_delegitimize_address |
| 13397 | entrypoint (e.g. used in the target delegitimization hook) and |
| 13398 | in ix86_find_base_term. As compile time memory optimization, we |
| 13399 | avoid allocating rtxes that will not change anything on the outcome |
| 13400 | of the callers (find_base_value and find_base_term). */ |
| 13401 | |
| 13402 | static inline rtx |
| 13403 | ix86_delegitimize_address_1 (rtx x, bool base_term_p) |
| 13404 | { |
| 13405 | rtx orig_x = delegitimize_mem_from_attrs (x); |
| 13406 | /* addend is NULL or some rtx if x is something+GOTOFF where |
| 13407 | something doesn't include the PIC register. */ |
| 13408 | rtx addend = NULL_RTX; |
| 13409 | /* reg_addend is NULL or a multiple of some register. */ |
| 13410 | rtx reg_addend = NULL_RTX; |
| 13411 | /* const_addend is NULL or a const_int. */ |
| 13412 | rtx const_addend = NULL_RTX; |
| 13413 | /* This is the result, or NULL. */ |
| 13414 | rtx result = NULL_RTX; |
| 13415 | |
| 13416 | x = orig_x; |
| 13417 | |
| 13418 | if (MEM_P (x)) |
| 13419 | x = XEXP (x, 0); |
| 13420 | |
| 13421 | if (TARGET_64BIT) |
| 13422 | { |
| 13423 | if (GET_CODE (x) == CONST |
| 13424 | && GET_CODE (XEXP (x, 0)) == PLUS |
| 13425 | && GET_MODE (XEXP (x, 0)) == Pmode |
| 13426 | && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
| 13427 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC |
| 13428 | && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL) |
| 13429 | { |
| 13430 | /* find_base_{value,term} only care about MEMs with arg_pointer_rtx |
| 13431 | base. A CONST can't be arg_pointer_rtx based. */ |
| 13432 | if (base_term_p && MEM_P (orig_x)) |
| 13433 | return orig_x; |
| 13434 | rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0); |
| 13435 | x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2); |
| 13436 | if (MEM_P (orig_x)) |
| 13437 | x = replace_equiv_address_nv (orig_x, x); |
| 13438 | return x; |
| 13439 | } |
| 13440 | |
| 13441 | if (GET_CODE (x) == CONST |
| 13442 | && GET_CODE (XEXP (x, 0)) == UNSPEC |
| 13443 | && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL |
| 13444 | || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL) |
| 13445 | && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)) |
| 13446 | { |
| 13447 | x = XVECEXP (XEXP (x, 0), 0, 0); |
| 13448 | if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) |
| 13449 | { |
| 13450 | x = lowpart_subreg (GET_MODE (orig_x), op: x, GET_MODE (x)); |
| 13451 | if (x == NULL_RTX) |
| 13452 | return orig_x; |
| 13453 | } |
| 13454 | return x; |
| 13455 | } |
| 13456 | |
| 13457 | if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC) |
| 13458 | return ix86_delegitimize_tls_address (orig_x); |
| 13459 | |
| 13460 | /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic |
| 13461 | and -mcmodel=medium -fpic. */ |
| 13462 | } |
| 13463 | |
| 13464 | if (GET_CODE (x) != PLUS |
| 13465 | || GET_CODE (XEXP (x, 1)) != CONST) |
| 13466 | return ix86_delegitimize_tls_address (orig_x); |
| 13467 | |
| 13468 | if (ix86_pic_register_p (XEXP (x, 0))) |
| 13469 | /* %ebx + GOT/GOTOFF */ |
| 13470 | ; |
| 13471 | else if (GET_CODE (XEXP (x, 0)) == PLUS) |
| 13472 | { |
| 13473 | /* %ebx + %reg * scale + GOT/GOTOFF */ |
| 13474 | reg_addend = XEXP (x, 0); |
| 13475 | if (ix86_pic_register_p (XEXP (reg_addend, 0))) |
| 13476 | reg_addend = XEXP (reg_addend, 1); |
| 13477 | else if (ix86_pic_register_p (XEXP (reg_addend, 1))) |
| 13478 | reg_addend = XEXP (reg_addend, 0); |
| 13479 | else |
| 13480 | { |
| 13481 | reg_addend = NULL_RTX; |
| 13482 | addend = XEXP (x, 0); |
| 13483 | } |
| 13484 | } |
| 13485 | else |
| 13486 | addend = XEXP (x, 0); |
| 13487 | |
| 13488 | x = XEXP (XEXP (x, 1), 0); |
| 13489 | if (GET_CODE (x) == PLUS |
| 13490 | && CONST_INT_P (XEXP (x, 1))) |
| 13491 | { |
| 13492 | const_addend = XEXP (x, 1); |
| 13493 | x = XEXP (x, 0); |
| 13494 | } |
| 13495 | |
| 13496 | if (GET_CODE (x) == UNSPEC |
| 13497 | && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) |
| 13498 | || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)) |
| 13499 | || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC |
| 13500 | && !MEM_P (orig_x) && !addend))) |
| 13501 | result = XVECEXP (x, 0, 0); |
| 13502 | |
| 13503 | if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (disp: x) |
| 13504 | && !MEM_P (orig_x)) |
| 13505 | result = XVECEXP (x, 0, 0); |
| 13506 | |
| 13507 | if (! result) |
| 13508 | return ix86_delegitimize_tls_address (orig_x); |
| 13509 | |
| 13510 | /* For (PLUS something CONST_INT) both find_base_{value,term} just |
| 13511 | recurse on the first operand. */ |
| 13512 | if (const_addend && !base_term_p) |
| 13513 | result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); |
| 13514 | if (reg_addend) |
| 13515 | result = gen_rtx_PLUS (Pmode, reg_addend, result); |
| 13516 | if (addend) |
| 13517 | { |
| 13518 | /* If the rest of original X doesn't involve the PIC register, add |
| 13519 | addend and subtract pic_offset_table_rtx. This can happen e.g. |
| 13520 | for code like: |
| 13521 | leal (%ebx, %ecx, 4), %ecx |
| 13522 | ... |
| 13523 | movl foo@GOTOFF(%ecx), %edx |
| 13524 | in which case we return (%ecx - %ebx) + foo |
| 13525 | or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg |
| 13526 | and reload has completed. Don't do the latter for debug, |
| 13527 | as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */ |
| 13528 | if (pic_offset_table_rtx |
| 13529 | && (!reload_completed || !ix86_use_pseudo_pic_reg ())) |
| 13530 | result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), |
| 13531 | pic_offset_table_rtx), |
| 13532 | result); |
| 13533 | else if (base_term_p |
| 13534 | && pic_offset_table_rtx |
| 13535 | && !TARGET_MACHO |
| 13536 | && !TARGET_VXWORKS_VAROFF) |
| 13537 | { |
| 13538 | rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); |
| 13539 | tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp); |
| 13540 | result = gen_rtx_PLUS (Pmode, tmp, result); |
| 13541 | } |
| 13542 | else |
| 13543 | return orig_x; |
| 13544 | } |
| 13545 | if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) |
| 13546 | { |
| 13547 | result = lowpart_subreg (GET_MODE (orig_x), op: result, Pmode); |
| 13548 | if (result == NULL_RTX) |
| 13549 | return orig_x; |
| 13550 | } |
| 13551 | return result; |
| 13552 | } |
| 13553 | |
| 13554 | /* The normal instantiation of the above template. */ |
| 13555 | |
| 13556 | static rtx |
| 13557 | ix86_delegitimize_address (rtx x) |
| 13558 | { |
| 13559 | return ix86_delegitimize_address_1 (x, base_term_p: false); |
| 13560 | } |
| 13561 | |
| 13562 | /* If X is a machine specific address (i.e. a symbol or label being |
| 13563 | referenced as a displacement from the GOT implemented using an |
| 13564 | UNSPEC), then return the base term. Otherwise return X. */ |
| 13565 | |
| 13566 | rtx |
| 13567 | ix86_find_base_term (rtx x) |
| 13568 | { |
| 13569 | rtx term; |
| 13570 | |
| 13571 | if (TARGET_64BIT) |
| 13572 | { |
| 13573 | if (GET_CODE (x) != CONST) |
| 13574 | return x; |
| 13575 | term = XEXP (x, 0); |
| 13576 | if (GET_CODE (term) == PLUS |
| 13577 | && CONST_INT_P (XEXP (term, 1))) |
| 13578 | term = XEXP (term, 0); |
| 13579 | if (GET_CODE (term) != UNSPEC |
| 13580 | || (XINT (term, 1) != UNSPEC_GOTPCREL |
| 13581 | && XINT (term, 1) != UNSPEC_PCREL)) |
| 13582 | return x; |
| 13583 | |
| 13584 | return XVECEXP (term, 0, 0); |
| 13585 | } |
| 13586 | |
| 13587 | return ix86_delegitimize_address_1 (x, base_term_p: true); |
| 13588 | } |
| 13589 | |
| 13590 | /* Return true if X shouldn't be emitted into the debug info. |
| 13591 | Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_ |
| 13592 | symbol easily into the .debug_info section, so we need not to |
| 13593 | delegitimize, but instead assemble as @gotoff. |
| 13594 | Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically |
| 13595 | assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */ |
| 13596 | |
| 13597 | static bool |
| 13598 | ix86_const_not_ok_for_debug_p (rtx x) |
| 13599 | { |
| 13600 | if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF) |
| 13601 | return true; |
| 13602 | |
| 13603 | if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0) |
| 13604 | return true; |
| 13605 | |
| 13606 | return false; |
| 13607 | } |
| 13608 | |
| 13609 | static void |
| 13610 | put_condition_code (enum rtx_code code, machine_mode mode, bool reverse, |
| 13611 | bool fp, FILE *file) |
| 13612 | { |
| 13613 | const char *suffix; |
| 13614 | |
| 13615 | if (mode == CCFPmode) |
| 13616 | { |
| 13617 | code = ix86_fp_compare_code_to_integer (code); |
| 13618 | mode = CCmode; |
| 13619 | } |
| 13620 | if (reverse) |
| 13621 | code = reverse_condition (code); |
| 13622 | |
| 13623 | switch (code) |
| 13624 | { |
| 13625 | case EQ: |
| 13626 | gcc_assert (mode != CCGZmode); |
| 13627 | switch (mode) |
| 13628 | { |
| 13629 | case E_CCAmode: |
| 13630 | suffix = "a" ; |
| 13631 | break; |
| 13632 | case E_CCCmode: |
| 13633 | suffix = "c" ; |
| 13634 | break; |
| 13635 | case E_CCOmode: |
| 13636 | suffix = "o" ; |
| 13637 | break; |
| 13638 | case E_CCPmode: |
| 13639 | suffix = "p" ; |
| 13640 | break; |
| 13641 | case E_CCSmode: |
| 13642 | suffix = "s" ; |
| 13643 | break; |
| 13644 | default: |
| 13645 | suffix = "e" ; |
| 13646 | break; |
| 13647 | } |
| 13648 | break; |
| 13649 | case NE: |
| 13650 | gcc_assert (mode != CCGZmode); |
| 13651 | switch (mode) |
| 13652 | { |
| 13653 | case E_CCAmode: |
| 13654 | suffix = "na" ; |
| 13655 | break; |
| 13656 | case E_CCCmode: |
| 13657 | suffix = "nc" ; |
| 13658 | break; |
| 13659 | case E_CCOmode: |
| 13660 | suffix = "no" ; |
| 13661 | break; |
| 13662 | case E_CCPmode: |
| 13663 | suffix = "np" ; |
| 13664 | break; |
| 13665 | case E_CCSmode: |
| 13666 | suffix = "ns" ; |
| 13667 | break; |
| 13668 | default: |
| 13669 | suffix = "ne" ; |
| 13670 | break; |
| 13671 | } |
| 13672 | break; |
| 13673 | case GT: |
| 13674 | gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); |
| 13675 | suffix = "g" ; |
| 13676 | break; |
| 13677 | case GTU: |
| 13678 | /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. |
| 13679 | Those same assemblers have the same but opposite lossage on cmov. */ |
| 13680 | if (mode == CCmode) |
| 13681 | suffix = fp ? "nbe" : "a" ; |
| 13682 | else |
| 13683 | gcc_unreachable (); |
| 13684 | break; |
| 13685 | case LT: |
| 13686 | switch (mode) |
| 13687 | { |
| 13688 | case E_CCNOmode: |
| 13689 | case E_CCGOCmode: |
| 13690 | suffix = "s" ; |
| 13691 | break; |
| 13692 | |
| 13693 | case E_CCmode: |
| 13694 | case E_CCGCmode: |
| 13695 | case E_CCGZmode: |
| 13696 | suffix = "l" ; |
| 13697 | break; |
| 13698 | |
| 13699 | default: |
| 13700 | gcc_unreachable (); |
| 13701 | } |
| 13702 | break; |
| 13703 | case LTU: |
| 13704 | if (mode == CCmode || mode == CCGZmode) |
| 13705 | suffix = "b" ; |
| 13706 | else if (mode == CCCmode) |
| 13707 | suffix = fp ? "b" : "c" ; |
| 13708 | else |
| 13709 | gcc_unreachable (); |
| 13710 | break; |
| 13711 | case GE: |
| 13712 | switch (mode) |
| 13713 | { |
| 13714 | case E_CCNOmode: |
| 13715 | case E_CCGOCmode: |
| 13716 | suffix = "ns" ; |
| 13717 | break; |
| 13718 | |
| 13719 | case E_CCmode: |
| 13720 | case E_CCGCmode: |
| 13721 | case E_CCGZmode: |
| 13722 | suffix = "ge" ; |
| 13723 | break; |
| 13724 | |
| 13725 | default: |
| 13726 | gcc_unreachable (); |
| 13727 | } |
| 13728 | break; |
| 13729 | case GEU: |
| 13730 | if (mode == CCmode || mode == CCGZmode) |
| 13731 | suffix = "nb" ; |
| 13732 | else if (mode == CCCmode) |
| 13733 | suffix = fp ? "nb" : "nc" ; |
| 13734 | else |
| 13735 | gcc_unreachable (); |
| 13736 | break; |
| 13737 | case LE: |
| 13738 | gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); |
| 13739 | suffix = "le" ; |
| 13740 | break; |
| 13741 | case LEU: |
| 13742 | if (mode == CCmode) |
| 13743 | suffix = "be" ; |
| 13744 | else |
| 13745 | gcc_unreachable (); |
| 13746 | break; |
| 13747 | case UNORDERED: |
| 13748 | suffix = fp ? "u" : "p" ; |
| 13749 | break; |
| 13750 | case ORDERED: |
| 13751 | suffix = fp ? "nu" : "np" ; |
| 13752 | break; |
| 13753 | default: |
| 13754 | gcc_unreachable (); |
| 13755 | } |
| 13756 | fputs (s: suffix, stream: file); |
| 13757 | } |
| 13758 | |
| 13759 | /* Print the name of register X to FILE based on its machine mode and number. |
| 13760 | If CODE is 'w', pretend the mode is HImode. |
| 13761 | If CODE is 'b', pretend the mode is QImode. |
| 13762 | If CODE is 'k', pretend the mode is SImode. |
| 13763 | If CODE is 'q', pretend the mode is DImode. |
| 13764 | If CODE is 'x', pretend the mode is V4SFmode. |
| 13765 | If CODE is 't', pretend the mode is V8SFmode. |
| 13766 | If CODE is 'g', pretend the mode is V16SFmode. |
| 13767 | If CODE is 'h', pretend the reg is the 'high' byte register. |
| 13768 | If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. |
| 13769 | If CODE is 'd', duplicate the operand for AVX instruction. |
| 13770 | If CODE is 'V', print naked full integer register name without %. |
| 13771 | */ |
| 13772 | |
| 13773 | void |
| 13774 | print_reg (rtx x, int code, FILE *file) |
| 13775 | { |
| 13776 | const char *reg; |
| 13777 | int msize; |
| 13778 | unsigned int regno; |
| 13779 | bool duplicated; |
| 13780 | |
| 13781 | if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V') |
| 13782 | putc (c: '%', stream: file); |
| 13783 | |
| 13784 | if (x == pc_rtx) |
| 13785 | { |
| 13786 | gcc_assert (TARGET_64BIT); |
| 13787 | fputs (s: "rip" , stream: file); |
| 13788 | return; |
| 13789 | } |
| 13790 | |
| 13791 | if (code == 'y' && STACK_TOP_P (x)) |
| 13792 | { |
| 13793 | fputs (s: "st(0)" , stream: file); |
| 13794 | return; |
| 13795 | } |
| 13796 | |
| 13797 | if (code == 'w') |
| 13798 | msize = 2; |
| 13799 | else if (code == 'b') |
| 13800 | msize = 1; |
| 13801 | else if (code == 'k') |
| 13802 | msize = 4; |
| 13803 | else if (code == 'q') |
| 13804 | msize = 8; |
| 13805 | else if (code == 'h') |
| 13806 | msize = 0; |
| 13807 | else if (code == 'x') |
| 13808 | msize = 16; |
| 13809 | else if (code == 't') |
| 13810 | msize = 32; |
| 13811 | else if (code == 'g') |
| 13812 | msize = 64; |
| 13813 | else |
| 13814 | msize = GET_MODE_SIZE (GET_MODE (x)); |
| 13815 | |
| 13816 | regno = REGNO (x); |
| 13817 | |
| 13818 | if (regno == ARG_POINTER_REGNUM |
| 13819 | || regno == FRAME_POINTER_REGNUM |
| 13820 | || regno == FPSR_REG) |
| 13821 | { |
| 13822 | output_operand_lossage |
| 13823 | ("invalid use of register '%s'" , reg_names[regno]); |
| 13824 | return; |
| 13825 | } |
| 13826 | else if (regno == FLAGS_REG) |
| 13827 | { |
| 13828 | output_operand_lossage ("invalid use of asm flag output" ); |
| 13829 | return; |
| 13830 | } |
| 13831 | |
| 13832 | if (code == 'V') |
| 13833 | { |
| 13834 | if (GENERAL_REGNO_P (regno)) |
| 13835 | msize = GET_MODE_SIZE (word_mode); |
| 13836 | else |
| 13837 | error ("%<V%> modifier on non-integer register" ); |
| 13838 | } |
| 13839 | |
| 13840 | duplicated = code == 'd' && TARGET_AVX; |
| 13841 | |
| 13842 | switch (msize) |
| 13843 | { |
| 13844 | case 16: |
| 13845 | case 12: |
| 13846 | case 8: |
| 13847 | if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode)) |
| 13848 | warning (0, "unsupported size for integer register" ); |
| 13849 | /* FALLTHRU */ |
| 13850 | case 4: |
| 13851 | if (LEGACY_INT_REGNO_P (regno)) |
| 13852 | putc (c: msize > 4 && TARGET_64BIT ? 'r' : 'e', stream: file); |
| 13853 | /* FALLTHRU */ |
| 13854 | case 2: |
| 13855 | normal: |
| 13856 | reg = hi_reg_name[regno]; |
| 13857 | break; |
| 13858 | case 1: |
| 13859 | if (regno >= ARRAY_SIZE (qi_reg_name)) |
| 13860 | goto normal; |
| 13861 | if (!ANY_QI_REGNO_P (regno)) |
| 13862 | error ("unsupported size for integer register" ); |
| 13863 | reg = qi_reg_name[regno]; |
| 13864 | break; |
| 13865 | case 0: |
| 13866 | if (regno >= ARRAY_SIZE (qi_high_reg_name)) |
| 13867 | goto normal; |
| 13868 | reg = qi_high_reg_name[regno]; |
| 13869 | break; |
| 13870 | case 32: |
| 13871 | case 64: |
| 13872 | if (SSE_REGNO_P (regno)) |
| 13873 | { |
| 13874 | gcc_assert (!duplicated); |
| 13875 | putc (c: msize == 32 ? 'y' : 'z', stream: file); |
| 13876 | reg = hi_reg_name[regno] + 1; |
| 13877 | break; |
| 13878 | } |
| 13879 | goto normal; |
| 13880 | default: |
| 13881 | gcc_unreachable (); |
| 13882 | } |
| 13883 | |
| 13884 | fputs (s: reg, stream: file); |
| 13885 | |
| 13886 | /* Irritatingly, AMD extended registers use |
| 13887 | different naming convention: "r%d[bwd]" */ |
| 13888 | if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno)) |
| 13889 | { |
| 13890 | gcc_assert (TARGET_64BIT); |
| 13891 | switch (msize) |
| 13892 | { |
| 13893 | case 0: |
| 13894 | error ("extended registers have no high halves" ); |
| 13895 | break; |
| 13896 | case 1: |
| 13897 | putc (c: 'b', stream: file); |
| 13898 | break; |
| 13899 | case 2: |
| 13900 | putc (c: 'w', stream: file); |
| 13901 | break; |
| 13902 | case 4: |
| 13903 | putc (c: 'd', stream: file); |
| 13904 | break; |
| 13905 | case 8: |
| 13906 | /* no suffix */ |
| 13907 | break; |
| 13908 | default: |
| 13909 | error ("unsupported operand size for extended register" ); |
| 13910 | break; |
| 13911 | } |
| 13912 | return; |
| 13913 | } |
| 13914 | |
| 13915 | if (duplicated) |
| 13916 | { |
| 13917 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 13918 | fprintf (stream: file, format: ", %%%s" , reg); |
| 13919 | else |
| 13920 | fprintf (stream: file, format: ", %s" , reg); |
| 13921 | } |
| 13922 | } |
| 13923 | |
| 13924 | /* Meaning of CODE: |
| 13925 | L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. |
| 13926 | C -- print opcode suffix for set/cmov insn. |
| 13927 | c -- like C, but print reversed condition |
| 13928 | F,f -- likewise, but for floating-point. |
| 13929 | O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", |
| 13930 | otherwise nothing |
| 13931 | R -- print embedded rounding and sae. |
| 13932 | r -- print only sae. |
| 13933 | z -- print the opcode suffix for the size of the current operand. |
| 13934 | Z -- likewise, with special suffixes for x87 instructions. |
| 13935 | * -- print a star (in certain assembler syntax) |
| 13936 | A -- print an absolute memory reference. |
| 13937 | E -- print address with DImode register names if TARGET_64BIT. |
| 13938 | w -- print the operand as if it's a "word" (HImode) even if it isn't. |
| 13939 | s -- print a shift double count, followed by the assemblers argument |
| 13940 | delimiter. |
| 13941 | b -- print the QImode name of the register for the indicated operand. |
| 13942 | %b0 would print %al if operands[0] is reg 0. |
| 13943 | w -- likewise, print the HImode name of the register. |
| 13944 | k -- likewise, print the SImode name of the register. |
| 13945 | q -- likewise, print the DImode name of the register. |
| 13946 | x -- likewise, print the V4SFmode name of the register. |
| 13947 | t -- likewise, print the V8SFmode name of the register. |
| 13948 | g -- likewise, print the V16SFmode name of the register. |
| 13949 | h -- print the QImode name for a "high" register, either ah, bh, ch or dh. |
| 13950 | y -- print "st(0)" instead of "st" as a register. |
| 13951 | d -- print duplicated register operand for AVX instruction. |
| 13952 | D -- print condition for SSE cmp instruction. |
| 13953 | P -- if PIC, print an @PLT suffix. For -fno-plt, load function |
| 13954 | address from GOT. |
| 13955 | p -- print raw symbol name. |
| 13956 | X -- don't print any sort of PIC '@' suffix for a symbol. |
| 13957 | & -- print some in-use local-dynamic symbol name. |
| 13958 | H -- print a memory address offset by 8; used for sse high-parts |
| 13959 | Y -- print condition for XOP pcom* instruction. |
| 13960 | V -- print naked full integer register name without %. |
| 13961 | v -- print segment override prefix |
| 13962 | + -- print a branch hint as 'cs' or 'ds' prefix |
| 13963 | ; -- print a semicolon (after prefixes due to bug in older gas). |
| 13964 | ~ -- print "i" if TARGET_AVX2, "f" otherwise. |
| 13965 | ^ -- print addr32 prefix if Pmode != word_mode |
| 13966 | M -- print addr32 prefix for TARGET_X32 with VSIB address. |
| 13967 | ! -- print NOTRACK prefix for jxx/call/ret instructions if required. |
| 13968 | N -- print maskz if it's constant 0 operand. |
| 13969 | G -- print embedded flag for ccmp/ctest. |
| 13970 | */ |
| 13971 | |
| 13972 | void |
| 13973 | ix86_print_operand (FILE *file, rtx x, int code) |
| 13974 | { |
| 13975 | if (code) |
| 13976 | { |
| 13977 | switch (code) |
| 13978 | { |
| 13979 | case 'A': |
| 13980 | switch (ASSEMBLER_DIALECT) |
| 13981 | { |
| 13982 | case ASM_ATT: |
| 13983 | putc (c: '*', stream: file); |
| 13984 | break; |
| 13985 | |
| 13986 | case ASM_INTEL: |
| 13987 | /* Intel syntax. For absolute addresses, registers should not |
| 13988 | be surrounded by braces. */ |
| 13989 | if (!REG_P (x)) |
| 13990 | { |
| 13991 | putc (c: '[', stream: file); |
| 13992 | ix86_print_operand (file, x, code: 0); |
| 13993 | putc (c: ']', stream: file); |
| 13994 | return; |
| 13995 | } |
| 13996 | break; |
| 13997 | |
| 13998 | default: |
| 13999 | gcc_unreachable (); |
| 14000 | } |
| 14001 | |
| 14002 | ix86_print_operand (file, x, code: 0); |
| 14003 | return; |
| 14004 | |
| 14005 | case 'E': |
| 14006 | /* Wrap address in an UNSPEC to declare special handling. */ |
| 14007 | if (TARGET_64BIT) |
| 14008 | x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR); |
| 14009 | |
| 14010 | output_address (VOIDmode, x); |
| 14011 | return; |
| 14012 | |
| 14013 | case 'L': |
| 14014 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14015 | putc (c: 'l', stream: file); |
| 14016 | return; |
| 14017 | |
| 14018 | case 'W': |
| 14019 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14020 | putc (c: 'w', stream: file); |
| 14021 | return; |
| 14022 | |
| 14023 | case 'B': |
| 14024 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14025 | putc (c: 'b', stream: file); |
| 14026 | return; |
| 14027 | |
| 14028 | case 'Q': |
| 14029 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14030 | putc (c: 'l', stream: file); |
| 14031 | return; |
| 14032 | |
| 14033 | case 'S': |
| 14034 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14035 | putc (c: 's', stream: file); |
| 14036 | return; |
| 14037 | |
| 14038 | case 'T': |
| 14039 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14040 | putc (c: 't', stream: file); |
| 14041 | return; |
| 14042 | |
| 14043 | case 'O': |
| 14044 | #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
| 14045 | if (ASSEMBLER_DIALECT != ASM_ATT) |
| 14046 | return; |
| 14047 | |
| 14048 | switch (GET_MODE_SIZE (GET_MODE (x))) |
| 14049 | { |
| 14050 | case 2: |
| 14051 | putc ('w', file); |
| 14052 | break; |
| 14053 | |
| 14054 | case 4: |
| 14055 | putc ('l', file); |
| 14056 | break; |
| 14057 | |
| 14058 | case 8: |
| 14059 | putc ('q', file); |
| 14060 | break; |
| 14061 | |
| 14062 | default: |
| 14063 | output_operand_lossage ("invalid operand size for operand " |
| 14064 | "code 'O'" ); |
| 14065 | return; |
| 14066 | } |
| 14067 | |
| 14068 | putc ('.', file); |
| 14069 | #endif |
| 14070 | return; |
| 14071 | |
| 14072 | case 'z': |
| 14073 | if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
| 14074 | { |
| 14075 | /* Opcodes don't get size suffixes if using Intel opcodes. */ |
| 14076 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
| 14077 | return; |
| 14078 | |
| 14079 | switch (GET_MODE_SIZE (GET_MODE (x))) |
| 14080 | { |
| 14081 | case 1: |
| 14082 | putc (c: 'b', stream: file); |
| 14083 | return; |
| 14084 | |
| 14085 | case 2: |
| 14086 | putc (c: 'w', stream: file); |
| 14087 | return; |
| 14088 | |
| 14089 | case 4: |
| 14090 | putc (c: 'l', stream: file); |
| 14091 | return; |
| 14092 | |
| 14093 | case 8: |
| 14094 | putc (c: 'q', stream: file); |
| 14095 | return; |
| 14096 | |
| 14097 | default: |
| 14098 | output_operand_lossage ("invalid operand size for operand " |
| 14099 | "code 'z'" ); |
| 14100 | return; |
| 14101 | } |
| 14102 | } |
| 14103 | |
| 14104 | if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) |
| 14105 | { |
| 14106 | if (this_is_asm_operands) |
| 14107 | warning_for_asm (this_is_asm_operands, |
| 14108 | "non-integer operand used with operand code %<z%>" ); |
| 14109 | else |
| 14110 | warning (0, "non-integer operand used with operand code %<z%>" ); |
| 14111 | } |
| 14112 | /* FALLTHRU */ |
| 14113 | |
| 14114 | case 'Z': |
| 14115 | /* 387 opcodes don't get size suffixes if using Intel opcodes. */ |
| 14116 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
| 14117 | return; |
| 14118 | |
| 14119 | if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
| 14120 | { |
| 14121 | switch (GET_MODE_SIZE (GET_MODE (x))) |
| 14122 | { |
| 14123 | case 2: |
| 14124 | #ifdef HAVE_AS_IX86_FILDS |
| 14125 | putc (c: 's', stream: file); |
| 14126 | #endif |
| 14127 | return; |
| 14128 | |
| 14129 | case 4: |
| 14130 | putc (c: 'l', stream: file); |
| 14131 | return; |
| 14132 | |
| 14133 | case 8: |
| 14134 | #ifdef HAVE_AS_IX86_FILDQ |
| 14135 | putc (c: 'q', stream: file); |
| 14136 | #else |
| 14137 | fputs ("ll" , file); |
| 14138 | #endif |
| 14139 | return; |
| 14140 | |
| 14141 | default: |
| 14142 | break; |
| 14143 | } |
| 14144 | } |
| 14145 | else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) |
| 14146 | { |
| 14147 | /* 387 opcodes don't get size suffixes |
| 14148 | if the operands are registers. */ |
| 14149 | if (STACK_REG_P (x)) |
| 14150 | return; |
| 14151 | |
| 14152 | switch (GET_MODE_SIZE (GET_MODE (x))) |
| 14153 | { |
| 14154 | case 4: |
| 14155 | putc (c: 's', stream: file); |
| 14156 | return; |
| 14157 | |
| 14158 | case 8: |
| 14159 | putc (c: 'l', stream: file); |
| 14160 | return; |
| 14161 | |
| 14162 | case 12: |
| 14163 | case 16: |
| 14164 | putc (c: 't', stream: file); |
| 14165 | return; |
| 14166 | |
| 14167 | default: |
| 14168 | break; |
| 14169 | } |
| 14170 | } |
| 14171 | else |
| 14172 | { |
| 14173 | output_operand_lossage ("invalid operand type used with " |
| 14174 | "operand code '%c'" , code); |
| 14175 | return; |
| 14176 | } |
| 14177 | |
| 14178 | output_operand_lossage ("invalid operand size for operand code '%c'" , |
| 14179 | code); |
| 14180 | return; |
| 14181 | |
| 14182 | case 'd': |
| 14183 | case 'b': |
| 14184 | case 'w': |
| 14185 | case 'k': |
| 14186 | case 'q': |
| 14187 | case 'h': |
| 14188 | case 't': |
| 14189 | case 'g': |
| 14190 | case 'y': |
| 14191 | case 'x': |
| 14192 | case 'X': |
| 14193 | case 'P': |
| 14194 | case 'p': |
| 14195 | case 'V': |
| 14196 | break; |
| 14197 | |
| 14198 | case 's': |
| 14199 | if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) |
| 14200 | { |
| 14201 | ix86_print_operand (file, x, code: 0); |
| 14202 | fputs (s: ", " , stream: file); |
| 14203 | } |
| 14204 | return; |
| 14205 | |
| 14206 | case 'Y': |
| 14207 | switch (GET_CODE (x)) |
| 14208 | { |
| 14209 | case NE: |
| 14210 | fputs (s: "neq" , stream: file); |
| 14211 | break; |
| 14212 | case EQ: |
| 14213 | fputs (s: "eq" , stream: file); |
| 14214 | break; |
| 14215 | case GE: |
| 14216 | case GEU: |
| 14217 | fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt" , stream: file); |
| 14218 | break; |
| 14219 | case GT: |
| 14220 | case GTU: |
| 14221 | fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle" , stream: file); |
| 14222 | break; |
| 14223 | case LE: |
| 14224 | case LEU: |
| 14225 | fputs (s: "le" , stream: file); |
| 14226 | break; |
| 14227 | case LT: |
| 14228 | case LTU: |
| 14229 | fputs (s: "lt" , stream: file); |
| 14230 | break; |
| 14231 | case UNORDERED: |
| 14232 | fputs (s: "unord" , stream: file); |
| 14233 | break; |
| 14234 | case ORDERED: |
| 14235 | fputs (s: "ord" , stream: file); |
| 14236 | break; |
| 14237 | case UNEQ: |
| 14238 | fputs (s: "ueq" , stream: file); |
| 14239 | break; |
| 14240 | case UNGE: |
| 14241 | fputs (s: "nlt" , stream: file); |
| 14242 | break; |
| 14243 | case UNGT: |
| 14244 | fputs (s: "nle" , stream: file); |
| 14245 | break; |
| 14246 | case UNLE: |
| 14247 | fputs (s: "ule" , stream: file); |
| 14248 | break; |
| 14249 | case UNLT: |
| 14250 | fputs (s: "ult" , stream: file); |
| 14251 | break; |
| 14252 | case LTGT: |
| 14253 | fputs (s: "une" , stream: file); |
| 14254 | break; |
| 14255 | default: |
| 14256 | output_operand_lossage ("operand is not a condition code, " |
| 14257 | "invalid operand code 'Y'" ); |
| 14258 | return; |
| 14259 | } |
| 14260 | return; |
| 14261 | |
| 14262 | case 'D': |
| 14263 | /* Little bit of braindamage here. The SSE compare instructions |
| 14264 | does use completely different names for the comparisons that the |
| 14265 | fp conditional moves. */ |
| 14266 | switch (GET_CODE (x)) |
| 14267 | { |
| 14268 | case UNEQ: |
| 14269 | if (TARGET_AVX) |
| 14270 | { |
| 14271 | fputs (s: "eq_us" , stream: file); |
| 14272 | break; |
| 14273 | } |
| 14274 | /* FALLTHRU */ |
| 14275 | case EQ: |
| 14276 | fputs (s: "eq" , stream: file); |
| 14277 | break; |
| 14278 | case UNLT: |
| 14279 | if (TARGET_AVX) |
| 14280 | { |
| 14281 | fputs (s: "nge" , stream: file); |
| 14282 | break; |
| 14283 | } |
| 14284 | /* FALLTHRU */ |
| 14285 | case LT: |
| 14286 | fputs (s: "lt" , stream: file); |
| 14287 | break; |
| 14288 | case UNLE: |
| 14289 | if (TARGET_AVX) |
| 14290 | { |
| 14291 | fputs (s: "ngt" , stream: file); |
| 14292 | break; |
| 14293 | } |
| 14294 | /* FALLTHRU */ |
| 14295 | case LE: |
| 14296 | fputs (s: "le" , stream: file); |
| 14297 | break; |
| 14298 | case UNORDERED: |
| 14299 | fputs (s: "unord" , stream: file); |
| 14300 | break; |
| 14301 | case LTGT: |
| 14302 | if (TARGET_AVX) |
| 14303 | { |
| 14304 | fputs (s: "neq_oq" , stream: file); |
| 14305 | break; |
| 14306 | } |
| 14307 | /* FALLTHRU */ |
| 14308 | case NE: |
| 14309 | fputs (s: "neq" , stream: file); |
| 14310 | break; |
| 14311 | case GE: |
| 14312 | if (TARGET_AVX) |
| 14313 | { |
| 14314 | fputs (s: "ge" , stream: file); |
| 14315 | break; |
| 14316 | } |
| 14317 | /* FALLTHRU */ |
| 14318 | case UNGE: |
| 14319 | fputs (s: "nlt" , stream: file); |
| 14320 | break; |
| 14321 | case GT: |
| 14322 | if (TARGET_AVX) |
| 14323 | { |
| 14324 | fputs (s: "gt" , stream: file); |
| 14325 | break; |
| 14326 | } |
| 14327 | /* FALLTHRU */ |
| 14328 | case UNGT: |
| 14329 | fputs (s: "nle" , stream: file); |
| 14330 | break; |
| 14331 | case ORDERED: |
| 14332 | fputs (s: "ord" , stream: file); |
| 14333 | break; |
| 14334 | default: |
| 14335 | output_operand_lossage ("operand is not a condition code, " |
| 14336 | "invalid operand code 'D'" ); |
| 14337 | return; |
| 14338 | } |
| 14339 | return; |
| 14340 | |
| 14341 | case 'F': |
| 14342 | case 'f': |
| 14343 | #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
| 14344 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14345 | putc ('.', file); |
| 14346 | gcc_fallthrough (); |
| 14347 | #endif |
| 14348 | |
| 14349 | case 'C': |
| 14350 | case 'c': |
| 14351 | if (!COMPARISON_P (x)) |
| 14352 | { |
| 14353 | output_operand_lossage ("operand is not a condition code, " |
| 14354 | "invalid operand code '%c'" , code); |
| 14355 | return; |
| 14356 | } |
| 14357 | put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), |
| 14358 | reverse: code == 'c' || code == 'f', |
| 14359 | fp: code == 'F' || code == 'f', |
| 14360 | file); |
| 14361 | return; |
| 14362 | |
| 14363 | case 'G': |
| 14364 | { |
| 14365 | int dfv = INTVAL (x); |
| 14366 | const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv]; |
| 14367 | fputs (s: dfv_suffix, stream: file); |
| 14368 | } |
| 14369 | return; |
| 14370 | |
| 14371 | case 'H': |
| 14372 | if (!offsettable_memref_p (x)) |
| 14373 | { |
| 14374 | output_operand_lossage ("operand is not an offsettable memory " |
| 14375 | "reference, invalid operand code 'H'" ); |
| 14376 | return; |
| 14377 | } |
| 14378 | /* It doesn't actually matter what mode we use here, as we're |
| 14379 | only going to use this for printing. */ |
| 14380 | x = adjust_address_nv (x, DImode, 8); |
| 14381 | /* Output 'qword ptr' for intel assembler dialect. */ |
| 14382 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
| 14383 | code = 'q'; |
| 14384 | break; |
| 14385 | |
| 14386 | case 'K': |
| 14387 | if (!CONST_INT_P (x)) |
| 14388 | { |
| 14389 | output_operand_lossage ("operand is not an integer, invalid " |
| 14390 | "operand code 'K'" ); |
| 14391 | return; |
| 14392 | } |
| 14393 | |
| 14394 | if (INTVAL (x) & IX86_HLE_ACQUIRE) |
| 14395 | #ifdef HAVE_AS_IX86_HLE |
| 14396 | fputs (s: "xacquire " , stream: file); |
| 14397 | #else |
| 14398 | fputs ("\n" ASM_BYTE "0xf2\n\t" , file); |
| 14399 | #endif |
| 14400 | else if (INTVAL (x) & IX86_HLE_RELEASE) |
| 14401 | #ifdef HAVE_AS_IX86_HLE |
| 14402 | fputs (s: "xrelease " , stream: file); |
| 14403 | #else |
| 14404 | fputs ("\n" ASM_BYTE "0xf3\n\t" , file); |
| 14405 | #endif |
| 14406 | /* We do not want to print value of the operand. */ |
| 14407 | return; |
| 14408 | |
| 14409 | case 'N': |
| 14410 | if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) |
| 14411 | fputs (s: "{z}" , stream: file); |
| 14412 | return; |
| 14413 | |
| 14414 | case 'r': |
| 14415 | if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE) |
| 14416 | { |
| 14417 | output_operand_lossage ("operand is not a specific integer, " |
| 14418 | "invalid operand code 'r'" ); |
| 14419 | return; |
| 14420 | } |
| 14421 | |
| 14422 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
| 14423 | fputs (s: ", " , stream: file); |
| 14424 | |
| 14425 | fputs (s: "{sae}" , stream: file); |
| 14426 | |
| 14427 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14428 | fputs (s: ", " , stream: file); |
| 14429 | |
| 14430 | return; |
| 14431 | |
| 14432 | case 'R': |
| 14433 | if (!CONST_INT_P (x)) |
| 14434 | { |
| 14435 | output_operand_lossage ("operand is not an integer, invalid " |
| 14436 | "operand code 'R'" ); |
| 14437 | return; |
| 14438 | } |
| 14439 | |
| 14440 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
| 14441 | fputs (s: ", " , stream: file); |
| 14442 | |
| 14443 | switch (INTVAL (x)) |
| 14444 | { |
| 14445 | case ROUND_NEAREST_INT | ROUND_SAE: |
| 14446 | fputs (s: "{rn-sae}" , stream: file); |
| 14447 | break; |
| 14448 | case ROUND_NEG_INF | ROUND_SAE: |
| 14449 | fputs (s: "{rd-sae}" , stream: file); |
| 14450 | break; |
| 14451 | case ROUND_POS_INF | ROUND_SAE: |
| 14452 | fputs (s: "{ru-sae}" , stream: file); |
| 14453 | break; |
| 14454 | case ROUND_ZERO | ROUND_SAE: |
| 14455 | fputs (s: "{rz-sae}" , stream: file); |
| 14456 | break; |
| 14457 | default: |
| 14458 | output_operand_lossage ("operand is not a specific integer, " |
| 14459 | "invalid operand code 'R'" ); |
| 14460 | } |
| 14461 | |
| 14462 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14463 | fputs (s: ", " , stream: file); |
| 14464 | |
| 14465 | return; |
| 14466 | |
| 14467 | case 'v': |
| 14468 | if (MEM_P (x)) |
| 14469 | { |
| 14470 | switch (MEM_ADDR_SPACE (x)) |
| 14471 | { |
| 14472 | case ADDR_SPACE_GENERIC: |
| 14473 | break; |
| 14474 | case ADDR_SPACE_SEG_FS: |
| 14475 | fputs (s: "fs " , stream: file); |
| 14476 | break; |
| 14477 | case ADDR_SPACE_SEG_GS: |
| 14478 | fputs (s: "gs " , stream: file); |
| 14479 | break; |
| 14480 | default: |
| 14481 | gcc_unreachable (); |
| 14482 | } |
| 14483 | } |
| 14484 | else |
| 14485 | output_operand_lossage ("operand is not a memory reference, " |
| 14486 | "invalid operand code 'v'" ); |
| 14487 | return; |
| 14488 | |
| 14489 | case '*': |
| 14490 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14491 | putc (c: '*', stream: file); |
| 14492 | return; |
| 14493 | |
| 14494 | case '&': |
| 14495 | { |
| 14496 | const char *name = get_some_local_dynamic_name (); |
| 14497 | if (name == NULL) |
| 14498 | output_operand_lossage ("'%%&' used without any " |
| 14499 | "local dynamic TLS references" ); |
| 14500 | else |
| 14501 | assemble_name (file, name); |
| 14502 | return; |
| 14503 | } |
| 14504 | |
| 14505 | case '+': |
| 14506 | { |
| 14507 | rtx x; |
| 14508 | |
| 14509 | if (!optimize |
| 14510 | || optimize_function_for_size_p (cfun) |
| 14511 | || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN |
| 14512 | && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN)) |
| 14513 | return; |
| 14514 | |
| 14515 | x = find_reg_note (current_output_insn, REG_BR_PROB, 0); |
| 14516 | if (x) |
| 14517 | { |
| 14518 | int pred_val = profile_probability::from_reg_br_prob_note |
| 14519 | (XINT (x, 0)).to_reg_br_prob_base (); |
| 14520 | |
| 14521 | bool taken = pred_val > REG_BR_PROB_BASE / 2; |
| 14522 | /* We use 3e (DS) prefix for taken branches and |
| 14523 | 2e (CS) prefix for not taken branches. */ |
| 14524 | if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN) |
| 14525 | fputs (s: "ds ; " , stream: file); |
| 14526 | else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN) |
| 14527 | fputs (s: "cs ; " , stream: file); |
| 14528 | } |
| 14529 | return; |
| 14530 | } |
| 14531 | |
| 14532 | case ';': |
| 14533 | #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX |
| 14534 | putc (';', file); |
| 14535 | #endif |
| 14536 | return; |
| 14537 | |
| 14538 | case '~': |
| 14539 | putc (TARGET_AVX2 ? 'i' : 'f', stream: file); |
| 14540 | return; |
| 14541 | |
| 14542 | case 'M': |
| 14543 | if (TARGET_X32) |
| 14544 | { |
| 14545 | /* NB: 32-bit indices in VSIB address are sign-extended |
| 14546 | to 64 bits. In x32, if 32-bit address 0xf7fa3010 is |
| 14547 | sign-extended to 0xfffffffff7fa3010 which is invalid |
| 14548 | address. Add addr32 prefix if there is no base |
| 14549 | register nor symbol. */ |
| 14550 | bool ok; |
| 14551 | struct ix86_address parts; |
| 14552 | ok = ix86_decompose_address (addr: x, out: &parts); |
| 14553 | gcc_assert (ok && parts.index == NULL_RTX); |
| 14554 | if (parts.base == NULL_RTX |
| 14555 | && (parts.disp == NULL_RTX |
| 14556 | || !symbolic_operand (parts.disp, |
| 14557 | GET_MODE (parts.disp)))) |
| 14558 | fputs (s: "addr32 " , stream: file); |
| 14559 | } |
| 14560 | return; |
| 14561 | |
| 14562 | case '^': |
| 14563 | if (Pmode != word_mode) |
| 14564 | fputs (s: "addr32 " , stream: file); |
| 14565 | return; |
| 14566 | |
| 14567 | case '!': |
| 14568 | if (ix86_notrack_prefixed_insn_p (current_output_insn)) |
| 14569 | fputs (s: "notrack " , stream: file); |
| 14570 | return; |
| 14571 | |
| 14572 | default: |
| 14573 | output_operand_lossage ("invalid operand code '%c'" , code); |
| 14574 | } |
| 14575 | } |
| 14576 | |
| 14577 | if (REG_P (x)) |
| 14578 | print_reg (x, code, file); |
| 14579 | |
| 14580 | else if (MEM_P (x)) |
| 14581 | { |
| 14582 | rtx addr = XEXP (x, 0); |
| 14583 | |
| 14584 | /* No `byte ptr' prefix for call instructions ... */ |
| 14585 | if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') |
| 14586 | { |
| 14587 | machine_mode mode = GET_MODE (x); |
| 14588 | const char *size; |
| 14589 | |
| 14590 | /* Check for explicit size override codes. */ |
| 14591 | if (code == 'b') |
| 14592 | size = "BYTE" ; |
| 14593 | else if (code == 'w') |
| 14594 | size = "WORD" ; |
| 14595 | else if (code == 'k') |
| 14596 | size = "DWORD" ; |
| 14597 | else if (code == 'q') |
| 14598 | size = "QWORD" ; |
| 14599 | else if (code == 'x') |
| 14600 | size = "XMMWORD" ; |
| 14601 | else if (code == 't') |
| 14602 | size = "YMMWORD" ; |
| 14603 | else if (code == 'g') |
| 14604 | size = "ZMMWORD" ; |
| 14605 | else if (mode == BLKmode) |
| 14606 | /* ... or BLKmode operands, when not overridden. */ |
| 14607 | size = NULL; |
| 14608 | else |
| 14609 | switch (GET_MODE_SIZE (mode)) |
| 14610 | { |
| 14611 | case 1: size = "BYTE" ; break; |
| 14612 | case 2: size = "WORD" ; break; |
| 14613 | case 4: size = "DWORD" ; break; |
| 14614 | case 8: size = "QWORD" ; break; |
| 14615 | case 12: size = "TBYTE" ; break; |
| 14616 | case 16: |
| 14617 | if (mode == XFmode) |
| 14618 | size = "TBYTE" ; |
| 14619 | else |
| 14620 | size = "XMMWORD" ; |
| 14621 | break; |
| 14622 | case 32: size = "YMMWORD" ; break; |
| 14623 | case 64: size = "ZMMWORD" ; break; |
| 14624 | default: |
| 14625 | gcc_unreachable (); |
| 14626 | } |
| 14627 | if (size) |
| 14628 | { |
| 14629 | fputs (s: size, stream: file); |
| 14630 | fputs (s: " PTR " , stream: file); |
| 14631 | } |
| 14632 | } |
| 14633 | |
| 14634 | if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) |
| 14635 | output_operand_lossage ("invalid constraints for operand" ); |
| 14636 | else |
| 14637 | ix86_print_operand_address_as |
| 14638 | (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P'); |
| 14639 | } |
| 14640 | |
| 14641 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode) |
| 14642 | { |
| 14643 | long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x), |
| 14644 | REAL_MODE_FORMAT (HFmode)); |
| 14645 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14646 | putc (c: '$', stream: file); |
| 14647 | fprintf (stream: file, format: "0x%04x" , (unsigned int) l); |
| 14648 | } |
| 14649 | |
| 14650 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode) |
| 14651 | { |
| 14652 | long l; |
| 14653 | |
| 14654 | REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); |
| 14655 | |
| 14656 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14657 | putc (c: '$', stream: file); |
| 14658 | /* Sign extend 32bit SFmode immediate to 8 bytes. */ |
| 14659 | if (code == 'q') |
| 14660 | fprintf (stream: file, format: "0x%08" HOST_LONG_LONG_FORMAT "x" , |
| 14661 | (unsigned long long) (int) l); |
| 14662 | else |
| 14663 | fprintf (stream: file, format: "0x%08x" , (unsigned int) l); |
| 14664 | } |
| 14665 | |
| 14666 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode) |
| 14667 | { |
| 14668 | long l[2]; |
| 14669 | |
| 14670 | REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); |
| 14671 | |
| 14672 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14673 | putc (c: '$', stream: file); |
| 14674 | fprintf (stream: file, format: "0x%lx%08lx" , l[1] & 0xffffffff, l[0] & 0xffffffff); |
| 14675 | } |
| 14676 | |
| 14677 | /* These float cases don't actually occur as immediate operands. */ |
| 14678 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode) |
| 14679 | { |
| 14680 | char dstr[30]; |
| 14681 | |
| 14682 | real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); |
| 14683 | fputs (s: dstr, stream: file); |
| 14684 | } |
| 14685 | |
| 14686 | /* Print bcst_mem_operand. */ |
| 14687 | else if (GET_CODE (x) == VEC_DUPLICATE) |
| 14688 | { |
| 14689 | machine_mode vmode = GET_MODE (x); |
| 14690 | /* Must be bcst_memory_operand. */ |
| 14691 | gcc_assert (bcst_mem_operand (x, vmode)); |
| 14692 | |
| 14693 | rtx mem = XEXP (x,0); |
| 14694 | ix86_print_operand (file, x: mem, code: 0); |
| 14695 | |
| 14696 | switch (vmode) |
| 14697 | { |
| 14698 | case E_V2DImode: |
| 14699 | case E_V2DFmode: |
| 14700 | fputs (s: "{1to2}" , stream: file); |
| 14701 | break; |
| 14702 | case E_V4SImode: |
| 14703 | case E_V4SFmode: |
| 14704 | case E_V4DImode: |
| 14705 | case E_V4DFmode: |
| 14706 | fputs (s: "{1to4}" , stream: file); |
| 14707 | break; |
| 14708 | case E_V8SImode: |
| 14709 | case E_V8SFmode: |
| 14710 | case E_V8DFmode: |
| 14711 | case E_V8DImode: |
| 14712 | case E_V8HFmode: |
| 14713 | fputs (s: "{1to8}" , stream: file); |
| 14714 | break; |
| 14715 | case E_V16SFmode: |
| 14716 | case E_V16SImode: |
| 14717 | case E_V16HFmode: |
| 14718 | fputs (s: "{1to16}" , stream: file); |
| 14719 | break; |
| 14720 | case E_V32HFmode: |
| 14721 | fputs (s: "{1to32}" , stream: file); |
| 14722 | break; |
| 14723 | default: |
| 14724 | gcc_unreachable (); |
| 14725 | } |
| 14726 | } |
| 14727 | |
| 14728 | else |
| 14729 | { |
| 14730 | /* We have patterns that allow zero sets of memory, for instance. |
| 14731 | In 64-bit mode, we should probably support all 8-byte vectors, |
| 14732 | since we can in fact encode that into an immediate. */ |
| 14733 | if (CONST_VECTOR_P (x)) |
| 14734 | { |
| 14735 | if (x != CONST0_RTX (GET_MODE (x))) |
| 14736 | output_operand_lossage ("invalid vector immediate" ); |
| 14737 | x = const0_rtx; |
| 14738 | } |
| 14739 | |
| 14740 | if (code == 'P') |
| 14741 | { |
| 14742 | if (ix86_force_load_from_GOT_p (x, call_p: true)) |
| 14743 | { |
| 14744 | /* For inline assembly statement, load function address |
| 14745 | from GOT with 'P' operand modifier to avoid PLT. */ |
| 14746 | x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), |
| 14747 | (TARGET_64BIT |
| 14748 | ? UNSPEC_GOTPCREL |
| 14749 | : UNSPEC_GOT)); |
| 14750 | x = gen_rtx_CONST (Pmode, x); |
| 14751 | x = gen_const_mem (Pmode, x); |
| 14752 | ix86_print_operand (file, x, code: 'A'); |
| 14753 | return; |
| 14754 | } |
| 14755 | } |
| 14756 | else if (code != 'p') |
| 14757 | { |
| 14758 | if (CONST_INT_P (x)) |
| 14759 | { |
| 14760 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14761 | putc (c: '$', stream: file); |
| 14762 | } |
| 14763 | else if (GET_CODE (x) == CONST || SYMBOL_REF_P (x) |
| 14764 | || LABEL_REF_P (x)) |
| 14765 | { |
| 14766 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14767 | putc (c: '$', stream: file); |
| 14768 | else |
| 14769 | fputs (s: "OFFSET FLAT:" , stream: file); |
| 14770 | } |
| 14771 | } |
| 14772 | if (CONST_INT_P (x)) |
| 14773 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); |
| 14774 | else if (flag_pic || MACHOPIC_INDIRECT) |
| 14775 | output_pic_addr_const (file, x, code); |
| 14776 | else |
| 14777 | output_addr_const (file, x); |
| 14778 | } |
| 14779 | } |
| 14780 | |
| 14781 | static bool |
| 14782 | ix86_print_operand_punct_valid_p (unsigned char code) |
| 14783 | { |
| 14784 | return (code == '*' || code == '+' || code == '&' || code == ';' |
| 14785 | || code == '~' || code == '^' || code == '!'); |
| 14786 | } |
| 14787 | |
| 14788 | /* Print a memory operand whose address is ADDR. */ |
| 14789 | |
| 14790 | static void |
| 14791 | ix86_print_operand_address_as (FILE *file, rtx addr, |
| 14792 | addr_space_t as, bool raw) |
| 14793 | { |
| 14794 | struct ix86_address parts; |
| 14795 | rtx base, index, disp; |
| 14796 | int scale; |
| 14797 | int ok; |
| 14798 | bool vsib = false; |
| 14799 | int code = 0; |
| 14800 | |
| 14801 | if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR) |
| 14802 | { |
| 14803 | ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts); |
| 14804 | gcc_assert (parts.index == NULL_RTX); |
| 14805 | parts.index = XVECEXP (addr, 0, 1); |
| 14806 | parts.scale = INTVAL (XVECEXP (addr, 0, 2)); |
| 14807 | addr = XVECEXP (addr, 0, 0); |
| 14808 | vsib = true; |
| 14809 | } |
| 14810 | else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR) |
| 14811 | { |
| 14812 | gcc_assert (TARGET_64BIT); |
| 14813 | ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts); |
| 14814 | code = 'q'; |
| 14815 | } |
| 14816 | else |
| 14817 | ok = ix86_decompose_address (addr, out: &parts); |
| 14818 | |
| 14819 | gcc_assert (ok); |
| 14820 | |
| 14821 | base = parts.base; |
| 14822 | index = parts.index; |
| 14823 | disp = parts.disp; |
| 14824 | scale = parts.scale; |
| 14825 | |
| 14826 | if (ADDR_SPACE_GENERIC_P (as)) |
| 14827 | as = parts.seg; |
| 14828 | else |
| 14829 | gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg)); |
| 14830 | |
| 14831 | if (!ADDR_SPACE_GENERIC_P (as) && !raw) |
| 14832 | { |
| 14833 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14834 | putc (c: '%', stream: file); |
| 14835 | |
| 14836 | switch (as) |
| 14837 | { |
| 14838 | case ADDR_SPACE_SEG_FS: |
| 14839 | fputs (s: "fs:" , stream: file); |
| 14840 | break; |
| 14841 | case ADDR_SPACE_SEG_GS: |
| 14842 | fputs (s: "gs:" , stream: file); |
| 14843 | break; |
| 14844 | default: |
| 14845 | gcc_unreachable (); |
| 14846 | } |
| 14847 | } |
| 14848 | |
| 14849 | /* Use one byte shorter RIP relative addressing for 64bit mode. */ |
| 14850 | if (TARGET_64BIT && !base && !index && !raw) |
| 14851 | { |
| 14852 | rtx symbol = disp; |
| 14853 | |
| 14854 | if (GET_CODE (disp) == CONST |
| 14855 | && GET_CODE (XEXP (disp, 0)) == PLUS |
| 14856 | && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) |
| 14857 | symbol = XEXP (XEXP (disp, 0), 0); |
| 14858 | |
| 14859 | if (LABEL_REF_P (symbol) |
| 14860 | || (SYMBOL_REF_P (symbol) |
| 14861 | && SYMBOL_REF_TLS_MODEL (symbol) == 0)) |
| 14862 | base = pc_rtx; |
| 14863 | } |
| 14864 | |
| 14865 | if (!base && !index) |
| 14866 | { |
| 14867 | /* Displacement only requires special attention. */ |
| 14868 | if (CONST_INT_P (disp)) |
| 14869 | { |
| 14870 | if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as)) |
| 14871 | fputs (s: "ds:" , stream: file); |
| 14872 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); |
| 14873 | } |
| 14874 | /* Load the external function address via the GOT slot to avoid PLT. */ |
| 14875 | else if (GET_CODE (disp) == CONST |
| 14876 | && GET_CODE (XEXP (disp, 0)) == UNSPEC |
| 14877 | && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL |
| 14878 | || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT) |
| 14879 | && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) |
| 14880 | output_pic_addr_const (file, x: disp, code: 0); |
| 14881 | else if (flag_pic) |
| 14882 | output_pic_addr_const (file, x: disp, code: 0); |
| 14883 | else |
| 14884 | output_addr_const (file, disp); |
| 14885 | } |
| 14886 | else |
| 14887 | { |
| 14888 | /* Print SImode register names to force addr32 prefix. */ |
| 14889 | if (SImode_address_operand (addr, VOIDmode)) |
| 14890 | { |
| 14891 | if (flag_checking) |
| 14892 | { |
| 14893 | gcc_assert (TARGET_64BIT); |
| 14894 | switch (GET_CODE (addr)) |
| 14895 | { |
| 14896 | case SUBREG: |
| 14897 | gcc_assert (GET_MODE (addr) == SImode); |
| 14898 | gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode); |
| 14899 | break; |
| 14900 | case ZERO_EXTEND: |
| 14901 | case AND: |
| 14902 | gcc_assert (GET_MODE (addr) == DImode); |
| 14903 | break; |
| 14904 | default: |
| 14905 | gcc_unreachable (); |
| 14906 | } |
| 14907 | } |
| 14908 | gcc_assert (!code); |
| 14909 | code = 'k'; |
| 14910 | } |
| 14911 | else if (code == 0 |
| 14912 | && TARGET_X32 |
| 14913 | && disp |
| 14914 | && CONST_INT_P (disp) |
| 14915 | && INTVAL (disp) < -16*1024*1024) |
| 14916 | { |
| 14917 | /* X32 runs in 64-bit mode, where displacement, DISP, in |
| 14918 | address DISP(%r64), is encoded as 32-bit immediate sign- |
| 14919 | extended from 32-bit to 64-bit. For -0x40000300(%r64), |
| 14920 | address is %r64 + 0xffffffffbffffd00. When %r64 < |
| 14921 | 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64, |
| 14922 | which is invalid for x32. The correct address is %r64 |
| 14923 | - 0x40000300 == 0xf7ffdd64. To properly encode |
| 14924 | -0x40000300(%r64) for x32, we zero-extend negative |
| 14925 | displacement by forcing addr32 prefix which truncates |
| 14926 | 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should |
| 14927 | zero-extend all negative displacements, including -1(%rsp). |
| 14928 | However, for small negative displacements, sign-extension |
| 14929 | won't cause overflow. We only zero-extend negative |
| 14930 | displacements if they < -16*1024*1024, which is also used |
| 14931 | to check legitimate address displacements for PIC. */ |
| 14932 | code = 'k'; |
| 14933 | } |
| 14934 | |
| 14935 | /* Since the upper 32 bits of RSP are always zero for x32, |
| 14936 | we can encode %esp as %rsp to avoid 0x67 prefix if |
| 14937 | there is no index register. */ |
| 14938 | if (TARGET_X32 && Pmode == SImode |
| 14939 | && !index && base && REG_P (base) && REGNO (base) == SP_REG) |
| 14940 | code = 'q'; |
| 14941 | |
| 14942 | if (ASSEMBLER_DIALECT == ASM_ATT) |
| 14943 | { |
| 14944 | if (disp) |
| 14945 | { |
| 14946 | if (flag_pic) |
| 14947 | output_pic_addr_const (file, x: disp, code: 0); |
| 14948 | else if (LABEL_REF_P (disp)) |
| 14949 | output_asm_label (disp); |
| 14950 | else |
| 14951 | output_addr_const (file, disp); |
| 14952 | } |
| 14953 | |
| 14954 | putc (c: '(', stream: file); |
| 14955 | if (base) |
| 14956 | print_reg (x: base, code, file); |
| 14957 | if (index) |
| 14958 | { |
| 14959 | putc (c: ',', stream: file); |
| 14960 | print_reg (x: index, code: vsib ? 0 : code, file); |
| 14961 | if (scale != 1 || vsib) |
| 14962 | fprintf (stream: file, format: ",%d" , scale); |
| 14963 | } |
| 14964 | putc (c: ')', stream: file); |
| 14965 | } |
| 14966 | else |
| 14967 | { |
| 14968 | rtx offset = NULL_RTX; |
| 14969 | |
| 14970 | if (disp) |
| 14971 | { |
| 14972 | /* Pull out the offset of a symbol; print any symbol itself. */ |
| 14973 | if (GET_CODE (disp) == CONST |
| 14974 | && GET_CODE (XEXP (disp, 0)) == PLUS |
| 14975 | && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) |
| 14976 | { |
| 14977 | offset = XEXP (XEXP (disp, 0), 1); |
| 14978 | disp = gen_rtx_CONST (VOIDmode, |
| 14979 | XEXP (XEXP (disp, 0), 0)); |
| 14980 | } |
| 14981 | |
| 14982 | if (flag_pic) |
| 14983 | output_pic_addr_const (file, x: disp, code: 0); |
| 14984 | else if (LABEL_REF_P (disp)) |
| 14985 | output_asm_label (disp); |
| 14986 | else if (CONST_INT_P (disp)) |
| 14987 | offset = disp; |
| 14988 | else |
| 14989 | output_addr_const (file, disp); |
| 14990 | } |
| 14991 | |
| 14992 | putc (c: '[', stream: file); |
| 14993 | if (base) |
| 14994 | { |
| 14995 | print_reg (x: base, code, file); |
| 14996 | if (offset) |
| 14997 | { |
| 14998 | if (INTVAL (offset) >= 0) |
| 14999 | putc (c: '+', stream: file); |
| 15000 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); |
| 15001 | } |
| 15002 | } |
| 15003 | else if (offset) |
| 15004 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); |
| 15005 | else |
| 15006 | putc (c: '0', stream: file); |
| 15007 | |
| 15008 | if (index) |
| 15009 | { |
| 15010 | putc (c: '+', stream: file); |
| 15011 | print_reg (x: index, code: vsib ? 0 : code, file); |
| 15012 | if (scale != 1 || vsib) |
| 15013 | fprintf (stream: file, format: "*%d" , scale); |
| 15014 | } |
| 15015 | putc (c: ']', stream: file); |
| 15016 | } |
| 15017 | } |
| 15018 | } |
| 15019 | |
| 15020 | static void |
| 15021 | ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr) |
| 15022 | { |
| 15023 | if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) |
| 15024 | output_operand_lossage ("invalid constraints for operand" ); |
| 15025 | else |
| 15026 | ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, raw: false); |
| 15027 | } |
| 15028 | |
| 15029 | /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ |
| 15030 | |
| 15031 | static bool |
| 15032 | (FILE *file, rtx x) |
| 15033 | { |
| 15034 | rtx op; |
| 15035 | |
| 15036 | if (GET_CODE (x) != UNSPEC) |
| 15037 | return false; |
| 15038 | |
| 15039 | op = XVECEXP (x, 0, 0); |
| 15040 | switch (XINT (x, 1)) |
| 15041 | { |
| 15042 | case UNSPEC_GOTOFF: |
| 15043 | output_addr_const (file, op); |
| 15044 | fputs (s: "@gotoff" , stream: file); |
| 15045 | break; |
| 15046 | case UNSPEC_GOTTPOFF: |
| 15047 | output_addr_const (file, op); |
| 15048 | /* FIXME: This might be @TPOFF in Sun ld. */ |
| 15049 | fputs (s: "@gottpoff" , stream: file); |
| 15050 | break; |
| 15051 | case UNSPEC_TPOFF: |
| 15052 | output_addr_const (file, op); |
| 15053 | fputs (s: "@tpoff" , stream: file); |
| 15054 | break; |
| 15055 | case UNSPEC_NTPOFF: |
| 15056 | output_addr_const (file, op); |
| 15057 | if (TARGET_64BIT) |
| 15058 | fputs (s: "@tpoff" , stream: file); |
| 15059 | else |
| 15060 | fputs (s: "@ntpoff" , stream: file); |
| 15061 | break; |
| 15062 | case UNSPEC_DTPOFF: |
| 15063 | output_addr_const (file, op); |
| 15064 | fputs (s: "@dtpoff" , stream: file); |
| 15065 | break; |
| 15066 | case UNSPEC_GOTNTPOFF: |
| 15067 | output_addr_const (file, op); |
| 15068 | if (TARGET_64BIT) |
| 15069 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
| 15070 | "@gottpoff(%rip)" : "@gottpoff[rip]" , stream: file); |
| 15071 | else |
| 15072 | fputs (s: "@gotntpoff" , stream: file); |
| 15073 | break; |
| 15074 | case UNSPEC_INDNTPOFF: |
| 15075 | output_addr_const (file, op); |
| 15076 | fputs (s: "@indntpoff" , stream: file); |
| 15077 | break; |
| 15078 | case UNSPEC_SECREL32: |
| 15079 | output_addr_const (file, op); |
| 15080 | fputs (s: "@secrel32" , stream: file); |
| 15081 | break; |
| 15082 | #if TARGET_MACHO |
| 15083 | case UNSPEC_MACHOPIC_OFFSET: |
| 15084 | output_addr_const (file, op); |
| 15085 | putc ('-', file); |
| 15086 | machopic_output_function_base_name (file); |
| 15087 | break; |
| 15088 | #endif |
| 15089 | |
| 15090 | default: |
| 15091 | return false; |
| 15092 | } |
| 15093 | |
| 15094 | return true; |
| 15095 | } |
| 15096 | |
| 15097 | |
| 15098 | /* Output code to perform a 387 binary operation in INSN, one of PLUS, |
| 15099 | MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] |
| 15100 | is the expression of the binary operation. The output may either be |
| 15101 | emitted here, or returned to the caller, like all output_* functions. |
| 15102 | |
| 15103 | There is no guarantee that the operands are the same mode, as they |
| 15104 | might be within FLOAT or FLOAT_EXTEND expressions. */ |
| 15105 | |
| 15106 | #ifndef SYSV386_COMPAT |
| 15107 | /* Set to 1 for compatibility with brain-damaged assemblers. No-one |
| 15108 | wants to fix the assemblers because that causes incompatibility |
| 15109 | with gcc. No-one wants to fix gcc because that causes |
| 15110 | incompatibility with assemblers... You can use the option of |
| 15111 | -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ |
| 15112 | #define SYSV386_COMPAT 1 |
| 15113 | #endif |
| 15114 | |
| 15115 | const char * |
| 15116 | output_387_binary_op (rtx_insn *insn, rtx *operands) |
| 15117 | { |
| 15118 | static char buf[40]; |
| 15119 | const char *p; |
| 15120 | bool is_sse |
| 15121 | = (SSE_REG_P (operands[0]) |
| 15122 | || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2])); |
| 15123 | |
| 15124 | if (is_sse) |
| 15125 | p = "%v" ; |
| 15126 | else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT |
| 15127 | || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) |
| 15128 | p = "fi" ; |
| 15129 | else |
| 15130 | p = "f" ; |
| 15131 | |
| 15132 | strcpy (dest: buf, src: p); |
| 15133 | |
| 15134 | switch (GET_CODE (operands[3])) |
| 15135 | { |
| 15136 | case PLUS: |
| 15137 | p = "add" ; break; |
| 15138 | case MINUS: |
| 15139 | p = "sub" ; break; |
| 15140 | case MULT: |
| 15141 | p = "mul" ; break; |
| 15142 | case DIV: |
| 15143 | p = "div" ; break; |
| 15144 | default: |
| 15145 | gcc_unreachable (); |
| 15146 | } |
| 15147 | |
| 15148 | strcat (dest: buf, src: p); |
| 15149 | |
| 15150 | if (is_sse) |
| 15151 | { |
| 15152 | p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd" ; |
| 15153 | strcat (dest: buf, src: p); |
| 15154 | |
| 15155 | if (TARGET_AVX) |
| 15156 | p = "\t{%2, %1, %0|%0, %1, %2}" ; |
| 15157 | else |
| 15158 | p = "\t{%2, %0|%0, %2}" ; |
| 15159 | |
| 15160 | strcat (dest: buf, src: p); |
| 15161 | return buf; |
| 15162 | } |
| 15163 | |
| 15164 | /* Even if we do not want to check the inputs, this documents input |
| 15165 | constraints. Which helps in understanding the following code. */ |
| 15166 | if (flag_checking) |
| 15167 | { |
| 15168 | if (STACK_REG_P (operands[0]) |
| 15169 | && ((REG_P (operands[1]) |
| 15170 | && REGNO (operands[0]) == REGNO (operands[1]) |
| 15171 | && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) |
| 15172 | || (REG_P (operands[2]) |
| 15173 | && REGNO (operands[0]) == REGNO (operands[2]) |
| 15174 | && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) |
| 15175 | && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) |
| 15176 | ; /* ok */ |
| 15177 | else |
| 15178 | gcc_unreachable (); |
| 15179 | } |
| 15180 | |
| 15181 | switch (GET_CODE (operands[3])) |
| 15182 | { |
| 15183 | case MULT: |
| 15184 | case PLUS: |
| 15185 | if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) |
| 15186 | std::swap (a&: operands[1], b&: operands[2]); |
| 15187 | |
| 15188 | /* know operands[0] == operands[1]. */ |
| 15189 | |
| 15190 | if (MEM_P (operands[2])) |
| 15191 | { |
| 15192 | p = "%Z2\t%2" ; |
| 15193 | break; |
| 15194 | } |
| 15195 | |
| 15196 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) |
| 15197 | { |
| 15198 | if (STACK_TOP_P (operands[0])) |
| 15199 | /* How is it that we are storing to a dead operand[2]? |
| 15200 | Well, presumably operands[1] is dead too. We can't |
| 15201 | store the result to st(0) as st(0) gets popped on this |
| 15202 | instruction. Instead store to operands[2] (which I |
| 15203 | think has to be st(1)). st(1) will be popped later. |
| 15204 | gcc <= 2.8.1 didn't have this check and generated |
| 15205 | assembly code that the Unixware assembler rejected. */ |
| 15206 | p = "p\t{%0, %2|%2, %0}" ; /* st(1) = st(0) op st(1); pop */ |
| 15207 | else |
| 15208 | p = "p\t{%2, %0|%0, %2}" ; /* st(r1) = st(r1) op st(0); pop */ |
| 15209 | break; |
| 15210 | } |
| 15211 | |
| 15212 | if (STACK_TOP_P (operands[0])) |
| 15213 | p = "\t{%y2, %0|%0, %y2}" ; /* st(0) = st(0) op st(r2) */ |
| 15214 | else |
| 15215 | p = "\t{%2, %0|%0, %2}" ; /* st(r1) = st(r1) op st(0) */ |
| 15216 | break; |
| 15217 | |
| 15218 | case MINUS: |
| 15219 | case DIV: |
| 15220 | if (MEM_P (operands[1])) |
| 15221 | { |
| 15222 | p = "r%Z1\t%1" ; |
| 15223 | break; |
| 15224 | } |
| 15225 | |
| 15226 | if (MEM_P (operands[2])) |
| 15227 | { |
| 15228 | p = "%Z2\t%2" ; |
| 15229 | break; |
| 15230 | } |
| 15231 | |
| 15232 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) |
| 15233 | { |
| 15234 | #if SYSV386_COMPAT |
| 15235 | /* The SystemV/386 SVR3.2 assembler, and probably all AT&T |
| 15236 | derived assemblers, confusingly reverse the direction of |
| 15237 | the operation for fsub{r} and fdiv{r} when the |
| 15238 | destination register is not st(0). The Intel assembler |
| 15239 | doesn't have this brain damage. Read !SYSV386_COMPAT to |
| 15240 | figure out what the hardware really does. */ |
| 15241 | if (STACK_TOP_P (operands[0])) |
| 15242 | p = "{p\t%0, %2|rp\t%2, %0}" ; |
| 15243 | else |
| 15244 | p = "{rp\t%2, %0|p\t%0, %2}" ; |
| 15245 | #else |
| 15246 | if (STACK_TOP_P (operands[0])) |
| 15247 | /* As above for fmul/fadd, we can't store to st(0). */ |
| 15248 | p = "rp\t{%0, %2|%2, %0}" ; /* st(1) = st(0) op st(1); pop */ |
| 15249 | else |
| 15250 | p = "p\t{%2, %0|%0, %2}" ; /* st(r1) = st(r1) op st(0); pop */ |
| 15251 | #endif |
| 15252 | break; |
| 15253 | } |
| 15254 | |
| 15255 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
| 15256 | { |
| 15257 | #if SYSV386_COMPAT |
| 15258 | if (STACK_TOP_P (operands[0])) |
| 15259 | p = "{rp\t%0, %1|p\t%1, %0}" ; |
| 15260 | else |
| 15261 | p = "{p\t%1, %0|rp\t%0, %1}" ; |
| 15262 | #else |
| 15263 | if (STACK_TOP_P (operands[0])) |
| 15264 | p = "p\t{%0, %1|%1, %0}" ; /* st(1) = st(1) op st(0); pop */ |
| 15265 | else |
| 15266 | p = "rp\t{%1, %0|%0, %1}" ; /* st(r2) = st(0) op st(r2); pop */ |
| 15267 | #endif |
| 15268 | break; |
| 15269 | } |
| 15270 | |
| 15271 | if (STACK_TOP_P (operands[0])) |
| 15272 | { |
| 15273 | if (STACK_TOP_P (operands[1])) |
| 15274 | p = "\t{%y2, %0|%0, %y2}" ; /* st(0) = st(0) op st(r2) */ |
| 15275 | else |
| 15276 | p = "r\t{%y1, %0|%0, %y1}" ; /* st(0) = st(r1) op st(0) */ |
| 15277 | break; |
| 15278 | } |
| 15279 | else if (STACK_TOP_P (operands[1])) |
| 15280 | { |
| 15281 | #if SYSV386_COMPAT |
| 15282 | p = "{\t%1, %0|r\t%0, %1}" ; |
| 15283 | #else |
| 15284 | p = "r\t{%1, %0|%0, %1}" ; /* st(r2) = st(0) op st(r2) */ |
| 15285 | #endif |
| 15286 | } |
| 15287 | else |
| 15288 | { |
| 15289 | #if SYSV386_COMPAT |
| 15290 | p = "{r\t%2, %0|\t%0, %2}" ; |
| 15291 | #else |
| 15292 | p = "\t{%2, %0|%0, %2}" ; /* st(r1) = st(r1) op st(0) */ |
| 15293 | #endif |
| 15294 | } |
| 15295 | break; |
| 15296 | |
| 15297 | default: |
| 15298 | gcc_unreachable (); |
| 15299 | } |
| 15300 | |
| 15301 | strcat (dest: buf, src: p); |
| 15302 | return buf; |
| 15303 | } |
| 15304 | |
| 15305 | /* Return needed mode for entity in optimize_mode_switching pass. */ |
| 15306 | |
| 15307 | static int |
| 15308 | ix86_dirflag_mode_needed (rtx_insn *insn) |
| 15309 | { |
| 15310 | if (CALL_P (insn)) |
| 15311 | { |
| 15312 | if (cfun->machine->func_type == TYPE_NORMAL) |
| 15313 | return X86_DIRFLAG_ANY; |
| 15314 | else |
| 15315 | /* No need to emit CLD in interrupt handler for TARGET_CLD. */ |
| 15316 | return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET; |
| 15317 | } |
| 15318 | |
| 15319 | if (recog_memoized (insn) < 0) |
| 15320 | return X86_DIRFLAG_ANY; |
| 15321 | |
| 15322 | if (get_attr_type (insn) == TYPE_STR) |
| 15323 | { |
| 15324 | /* Emit cld instruction if stringops are used in the function. */ |
| 15325 | if (cfun->machine->func_type == TYPE_NORMAL) |
| 15326 | return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY; |
| 15327 | else |
| 15328 | return X86_DIRFLAG_RESET; |
| 15329 | } |
| 15330 | |
| 15331 | return X86_DIRFLAG_ANY; |
| 15332 | } |
| 15333 | |
| 15334 | /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */ |
| 15335 | |
| 15336 | static bool |
| 15337 | ix86_check_avx_upper_register (const_rtx exp) |
| 15338 | { |
| 15339 | /* construct_container may return a parallel with expr_list |
| 15340 | which contains the real reg and mode */ |
| 15341 | subrtx_iterator::array_type array; |
| 15342 | FOR_EACH_SUBRTX (iter, array, exp, NONCONST) |
| 15343 | { |
| 15344 | const_rtx x = *iter; |
| 15345 | if (SSE_REG_P (x) |
| 15346 | && !EXT_REX_SSE_REG_P (x) |
| 15347 | && GET_MODE_BITSIZE (GET_MODE (x)) > 128) |
| 15348 | return true; |
| 15349 | } |
| 15350 | |
| 15351 | return false; |
| 15352 | } |
| 15353 | |
| 15354 | /* Check if a 256bit or 512bit AVX register is referenced in stores. */ |
| 15355 | |
| 15356 | static void |
| 15357 | ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) |
| 15358 | { |
| 15359 | if (SSE_REG_P (dest) |
| 15360 | && !EXT_REX_SSE_REG_P (dest) |
| 15361 | && GET_MODE_BITSIZE (GET_MODE (dest)) > 128) |
| 15362 | { |
| 15363 | bool *used = (bool *) data; |
| 15364 | *used = true; |
| 15365 | } |
| 15366 | } |
| 15367 | |
| 15368 | /* Return needed mode for entity in optimize_mode_switching pass. */ |
| 15369 | |
| 15370 | static int |
| 15371 | ix86_avx_u128_mode_needed (rtx_insn *insn) |
| 15372 | { |
| 15373 | if (DEBUG_INSN_P (insn)) |
| 15374 | return AVX_U128_ANY; |
| 15375 | |
| 15376 | if (CALL_P (insn)) |
| 15377 | { |
| 15378 | rtx link; |
| 15379 | |
| 15380 | /* Needed mode is set to AVX_U128_CLEAN if there are |
| 15381 | no 256bit or 512bit modes used in function arguments. */ |
| 15382 | for (link = CALL_INSN_FUNCTION_USAGE (insn); |
| 15383 | link; |
| 15384 | link = XEXP (link, 1)) |
| 15385 | { |
| 15386 | if (GET_CODE (XEXP (link, 0)) == USE) |
| 15387 | { |
| 15388 | rtx arg = XEXP (XEXP (link, 0), 0); |
| 15389 | |
| 15390 | if (ix86_check_avx_upper_register (exp: arg)) |
| 15391 | return AVX_U128_DIRTY; |
| 15392 | } |
| 15393 | } |
| 15394 | |
| 15395 | /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit |
| 15396 | nor 512bit registers used in the function return register. */ |
| 15397 | bool avx_upper_reg_found = false; |
| 15398 | note_stores (insn, ix86_check_avx_upper_stores, |
| 15399 | &avx_upper_reg_found); |
| 15400 | if (avx_upper_reg_found) |
| 15401 | return AVX_U128_DIRTY; |
| 15402 | |
| 15403 | /* If the function is known to preserve some SSE registers, |
| 15404 | RA and previous passes can legitimately rely on that for |
| 15405 | modes wider than 256 bits. It's only safe to issue a |
| 15406 | vzeroupper if all SSE registers are clobbered. */ |
| 15407 | const function_abi &abi = insn_callee_abi (insn); |
| 15408 | if (vzeroupper_pattern (PATTERN (insn), VOIDmode) |
| 15409 | /* Should be safe to issue an vzeroupper before sibling_call_p. |
| 15410 | Also there not mode_exit for sibling_call, so there could be |
| 15411 | missing vzeroupper for that. */ |
| 15412 | || !(SIBLING_CALL_P (insn) |
| 15413 | || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], |
| 15414 | y: abi.mode_clobbers (V4DImode)))) |
| 15415 | return AVX_U128_ANY; |
| 15416 | |
| 15417 | return AVX_U128_CLEAN; |
| 15418 | } |
| 15419 | |
| 15420 | rtx set = single_set (insn); |
| 15421 | if (set) |
| 15422 | { |
| 15423 | rtx dest = SET_DEST (set); |
| 15424 | rtx src = SET_SRC (set); |
| 15425 | if (SSE_REG_P (dest) |
| 15426 | && !EXT_REX_SSE_REG_P (dest) |
| 15427 | && GET_MODE_BITSIZE (GET_MODE (dest)) > 128) |
| 15428 | { |
| 15429 | /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the |
| 15430 | source isn't zero. */ |
| 15431 | if (standard_sse_constant_p (x: src, GET_MODE (dest)) != 1) |
| 15432 | return AVX_U128_DIRTY; |
| 15433 | else |
| 15434 | return AVX_U128_ANY; |
| 15435 | } |
| 15436 | else |
| 15437 | { |
| 15438 | if (ix86_check_avx_upper_register (exp: src)) |
| 15439 | return AVX_U128_DIRTY; |
| 15440 | } |
| 15441 | |
| 15442 | /* This isn't YMM/ZMM load/store. */ |
| 15443 | return AVX_U128_ANY; |
| 15444 | } |
| 15445 | |
| 15446 | /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced. |
| 15447 | Hardware changes state only when a 256bit register is written to, |
| 15448 | but we need to prevent the compiler from moving optimal insertion |
| 15449 | point above eventual read from 256bit or 512 bit register. */ |
| 15450 | if (ix86_check_avx_upper_register (exp: PATTERN (insn))) |
| 15451 | return AVX_U128_DIRTY; |
| 15452 | |
| 15453 | return AVX_U128_ANY; |
| 15454 | } |
| 15455 | |
| 15456 | /* Return mode that i387 must be switched into |
| 15457 | prior to the execution of insn. */ |
| 15458 | |
| 15459 | static int |
| 15460 | ix86_i387_mode_needed (int entity, rtx_insn *insn) |
| 15461 | { |
| 15462 | enum attr_i387_cw mode; |
| 15463 | |
| 15464 | /* The mode UNINITIALIZED is used to store control word after a |
| 15465 | function call or ASM pattern. The mode ANY specify that function |
| 15466 | has no requirements on the control word and make no changes in the |
| 15467 | bits we are interested in. */ |
| 15468 | |
| 15469 | if (CALL_P (insn) |
| 15470 | || (NONJUMP_INSN_P (insn) |
| 15471 | && (asm_noperands (PATTERN (insn)) >= 0 |
| 15472 | || GET_CODE (PATTERN (insn)) == ASM_INPUT))) |
| 15473 | return I387_CW_UNINITIALIZED; |
| 15474 | |
| 15475 | if (recog_memoized (insn) < 0) |
| 15476 | return I387_CW_ANY; |
| 15477 | |
| 15478 | mode = get_attr_i387_cw (insn); |
| 15479 | |
| 15480 | switch (entity) |
| 15481 | { |
| 15482 | case I387_ROUNDEVEN: |
| 15483 | if (mode == I387_CW_ROUNDEVEN) |
| 15484 | return mode; |
| 15485 | break; |
| 15486 | |
| 15487 | case I387_TRUNC: |
| 15488 | if (mode == I387_CW_TRUNC) |
| 15489 | return mode; |
| 15490 | break; |
| 15491 | |
| 15492 | case I387_FLOOR: |
| 15493 | if (mode == I387_CW_FLOOR) |
| 15494 | return mode; |
| 15495 | break; |
| 15496 | |
| 15497 | case I387_CEIL: |
| 15498 | if (mode == I387_CW_CEIL) |
| 15499 | return mode; |
| 15500 | break; |
| 15501 | |
| 15502 | default: |
| 15503 | gcc_unreachable (); |
| 15504 | } |
| 15505 | |
| 15506 | return I387_CW_ANY; |
| 15507 | } |
| 15508 | |
| 15509 | /* Return mode that entity must be switched into |
| 15510 | prior to the execution of insn. */ |
| 15511 | |
| 15512 | static int |
| 15513 | ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) |
| 15514 | { |
| 15515 | switch (entity) |
| 15516 | { |
| 15517 | case X86_DIRFLAG: |
| 15518 | return ix86_dirflag_mode_needed (insn); |
| 15519 | case AVX_U128: |
| 15520 | return ix86_avx_u128_mode_needed (insn); |
| 15521 | case I387_ROUNDEVEN: |
| 15522 | case I387_TRUNC: |
| 15523 | case I387_FLOOR: |
| 15524 | case I387_CEIL: |
| 15525 | return ix86_i387_mode_needed (entity, insn); |
| 15526 | default: |
| 15527 | gcc_unreachable (); |
| 15528 | } |
| 15529 | return 0; |
| 15530 | } |
| 15531 | |
| 15532 | /* Calculate mode of upper 128bit AVX registers after the insn. */ |
| 15533 | |
| 15534 | static int |
| 15535 | ix86_avx_u128_mode_after (int mode, rtx_insn *insn) |
| 15536 | { |
| 15537 | rtx pat = PATTERN (insn); |
| 15538 | |
| 15539 | if (vzeroupper_pattern (pat, VOIDmode) |
| 15540 | || vzeroall_pattern (pat, VOIDmode)) |
| 15541 | return AVX_U128_CLEAN; |
| 15542 | |
| 15543 | /* We know that state is clean after CALL insn if there are no |
| 15544 | 256bit or 512bit registers used in the function return register. */ |
| 15545 | if (CALL_P (insn)) |
| 15546 | { |
| 15547 | bool avx_upper_reg_found = false; |
| 15548 | note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); |
| 15549 | |
| 15550 | if (avx_upper_reg_found) |
| 15551 | return AVX_U128_DIRTY; |
| 15552 | |
| 15553 | /* If the function desn't clobber any sse registers or only clobber |
| 15554 | 128-bit part, Then vzeroupper isn't issued before the function exit. |
| 15555 | the status not CLEAN but ANY after the function. */ |
| 15556 | const function_abi &abi = insn_callee_abi (insn); |
| 15557 | if (!(SIBLING_CALL_P (insn) |
| 15558 | || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], |
| 15559 | y: abi.mode_clobbers (V4DImode)))) |
| 15560 | return AVX_U128_ANY; |
| 15561 | |
| 15562 | return AVX_U128_CLEAN; |
| 15563 | } |
| 15564 | |
| 15565 | /* Otherwise, return current mode. Remember that if insn |
| 15566 | references AVX 256bit or 512bit registers, the mode was already |
| 15567 | changed to DIRTY from MODE_NEEDED. */ |
| 15568 | return mode; |
| 15569 | } |
| 15570 | |
| 15571 | /* Return the mode that an insn results in. */ |
| 15572 | |
| 15573 | static int |
| 15574 | ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET) |
| 15575 | { |
| 15576 | switch (entity) |
| 15577 | { |
| 15578 | case X86_DIRFLAG: |
| 15579 | return mode; |
| 15580 | case AVX_U128: |
| 15581 | return ix86_avx_u128_mode_after (mode, insn); |
| 15582 | case I387_ROUNDEVEN: |
| 15583 | case I387_TRUNC: |
| 15584 | case I387_FLOOR: |
| 15585 | case I387_CEIL: |
| 15586 | return mode; |
| 15587 | default: |
| 15588 | gcc_unreachable (); |
| 15589 | } |
| 15590 | } |
| 15591 | |
| 15592 | static int |
| 15593 | ix86_dirflag_mode_entry (void) |
| 15594 | { |
| 15595 | /* For TARGET_CLD or in the interrupt handler we can't assume |
| 15596 | direction flag state at function entry. */ |
| 15597 | if (TARGET_CLD |
| 15598 | || cfun->machine->func_type != TYPE_NORMAL) |
| 15599 | return X86_DIRFLAG_ANY; |
| 15600 | |
| 15601 | return X86_DIRFLAG_RESET; |
| 15602 | } |
| 15603 | |
| 15604 | static int |
| 15605 | ix86_avx_u128_mode_entry (void) |
| 15606 | { |
| 15607 | tree arg; |
| 15608 | |
| 15609 | /* Entry mode is set to AVX_U128_DIRTY if there are |
| 15610 | 256bit or 512bit modes used in function arguments. */ |
| 15611 | for (arg = DECL_ARGUMENTS (current_function_decl); arg; |
| 15612 | arg = TREE_CHAIN (arg)) |
| 15613 | { |
| 15614 | rtx incoming = DECL_INCOMING_RTL (arg); |
| 15615 | |
| 15616 | if (incoming && ix86_check_avx_upper_register (exp: incoming)) |
| 15617 | return AVX_U128_DIRTY; |
| 15618 | } |
| 15619 | |
| 15620 | return AVX_U128_CLEAN; |
| 15621 | } |
| 15622 | |
| 15623 | /* Return a mode that ENTITY is assumed to be |
| 15624 | switched to at function entry. */ |
| 15625 | |
| 15626 | static int |
| 15627 | ix86_mode_entry (int entity) |
| 15628 | { |
| 15629 | switch (entity) |
| 15630 | { |
| 15631 | case X86_DIRFLAG: |
| 15632 | return ix86_dirflag_mode_entry (); |
| 15633 | case AVX_U128: |
| 15634 | return ix86_avx_u128_mode_entry (); |
| 15635 | case I387_ROUNDEVEN: |
| 15636 | case I387_TRUNC: |
| 15637 | case I387_FLOOR: |
| 15638 | case I387_CEIL: |
| 15639 | return I387_CW_ANY; |
| 15640 | default: |
| 15641 | gcc_unreachable (); |
| 15642 | } |
| 15643 | } |
| 15644 | |
| 15645 | static int |
| 15646 | ix86_avx_u128_mode_exit (void) |
| 15647 | { |
| 15648 | rtx reg = crtl->return_rtx; |
| 15649 | |
| 15650 | /* Exit mode is set to AVX_U128_DIRTY if there are 256bit |
| 15651 | or 512 bit modes used in the function return register. */ |
| 15652 | if (reg && ix86_check_avx_upper_register (exp: reg)) |
| 15653 | return AVX_U128_DIRTY; |
| 15654 | |
| 15655 | /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit |
| 15656 | modes used in function arguments, otherwise return AVX_U128_CLEAN. |
| 15657 | */ |
| 15658 | return ix86_avx_u128_mode_entry (); |
| 15659 | } |
| 15660 | |
| 15661 | /* Return a mode that ENTITY is assumed to be |
| 15662 | switched to at function exit. */ |
| 15663 | |
| 15664 | static int |
| 15665 | ix86_mode_exit (int entity) |
| 15666 | { |
| 15667 | switch (entity) |
| 15668 | { |
| 15669 | case X86_DIRFLAG: |
| 15670 | return X86_DIRFLAG_ANY; |
| 15671 | case AVX_U128: |
| 15672 | return ix86_avx_u128_mode_exit (); |
| 15673 | case I387_ROUNDEVEN: |
| 15674 | case I387_TRUNC: |
| 15675 | case I387_FLOOR: |
| 15676 | case I387_CEIL: |
| 15677 | return I387_CW_ANY; |
| 15678 | default: |
| 15679 | gcc_unreachable (); |
| 15680 | } |
| 15681 | } |
| 15682 | |
| 15683 | static int |
| 15684 | ix86_mode_priority (int, int n) |
| 15685 | { |
| 15686 | return n; |
| 15687 | } |
| 15688 | |
| 15689 | /* Output code to initialize control word copies used by trunc?f?i and |
| 15690 | rounding patterns. CURRENT_MODE is set to current control word, |
| 15691 | while NEW_MODE is set to new control word. */ |
| 15692 | |
| 15693 | static void |
| 15694 | emit_i387_cw_initialization (int mode) |
| 15695 | { |
| 15696 | rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); |
| 15697 | rtx new_mode; |
| 15698 | |
| 15699 | enum ix86_stack_slot slot; |
| 15700 | |
| 15701 | rtx reg = gen_reg_rtx (HImode); |
| 15702 | |
| 15703 | emit_insn (gen_x86_fnstcw_1 (stored_mode)); |
| 15704 | emit_move_insn (reg, copy_rtx (stored_mode)); |
| 15705 | |
| 15706 | switch (mode) |
| 15707 | { |
| 15708 | case I387_CW_ROUNDEVEN: |
| 15709 | /* round to nearest */ |
| 15710 | emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
| 15711 | slot = SLOT_CW_ROUNDEVEN; |
| 15712 | break; |
| 15713 | |
| 15714 | case I387_CW_TRUNC: |
| 15715 | /* round toward zero (truncate) */ |
| 15716 | emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); |
| 15717 | slot = SLOT_CW_TRUNC; |
| 15718 | break; |
| 15719 | |
| 15720 | case I387_CW_FLOOR: |
| 15721 | /* round down toward -oo */ |
| 15722 | emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
| 15723 | emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); |
| 15724 | slot = SLOT_CW_FLOOR; |
| 15725 | break; |
| 15726 | |
| 15727 | case I387_CW_CEIL: |
| 15728 | /* round up toward +oo */ |
| 15729 | emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
| 15730 | emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); |
| 15731 | slot = SLOT_CW_CEIL; |
| 15732 | break; |
| 15733 | |
| 15734 | default: |
| 15735 | gcc_unreachable (); |
| 15736 | } |
| 15737 | |
| 15738 | gcc_assert (slot < MAX_386_STACK_LOCALS); |
| 15739 | |
| 15740 | new_mode = assign_386_stack_local (HImode, slot); |
| 15741 | emit_move_insn (new_mode, reg); |
| 15742 | } |
| 15743 | |
| 15744 | /* Generate one or more insns to set ENTITY to MODE. */ |
| 15745 | |
| 15746 | static void |
| 15747 | ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, |
| 15748 | HARD_REG_SET regs_live ATTRIBUTE_UNUSED) |
| 15749 | { |
| 15750 | switch (entity) |
| 15751 | { |
| 15752 | case X86_DIRFLAG: |
| 15753 | if (mode == X86_DIRFLAG_RESET) |
| 15754 | emit_insn (gen_cld ()); |
| 15755 | break; |
| 15756 | case AVX_U128: |
| 15757 | if (mode == AVX_U128_CLEAN) |
| 15758 | ix86_expand_avx_vzeroupper (); |
| 15759 | break; |
| 15760 | case I387_ROUNDEVEN: |
| 15761 | case I387_TRUNC: |
| 15762 | case I387_FLOOR: |
| 15763 | case I387_CEIL: |
| 15764 | if (mode != I387_CW_ANY |
| 15765 | && mode != I387_CW_UNINITIALIZED) |
| 15766 | emit_i387_cw_initialization (mode); |
| 15767 | break; |
| 15768 | default: |
| 15769 | gcc_unreachable (); |
| 15770 | } |
| 15771 | } |
| 15772 | |
| 15773 | /* Output code for INSN to convert a float to a signed int. OPERANDS |
| 15774 | are the insn operands. The output may be [HSD]Imode and the input |
| 15775 | operand may be [SDX]Fmode. */ |
| 15776 | |
| 15777 | const char * |
| 15778 | output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp) |
| 15779 | { |
| 15780 | bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); |
| 15781 | bool dimode_p = GET_MODE (operands[0]) == DImode; |
| 15782 | int round_mode = get_attr_i387_cw (insn); |
| 15783 | |
| 15784 | static char buf[40]; |
| 15785 | const char *p; |
| 15786 | |
| 15787 | /* Jump through a hoop or two for DImode, since the hardware has no |
| 15788 | non-popping instruction. We used to do this a different way, but |
| 15789 | that was somewhat fragile and broke with post-reload splitters. */ |
| 15790 | if ((dimode_p || fisttp) && !stack_top_dies) |
| 15791 | output_asm_insn ("fld\t%y1" , operands); |
| 15792 | |
| 15793 | gcc_assert (STACK_TOP_P (operands[1])); |
| 15794 | gcc_assert (MEM_P (operands[0])); |
| 15795 | gcc_assert (GET_MODE (operands[1]) != TFmode); |
| 15796 | |
| 15797 | if (fisttp) |
| 15798 | return "fisttp%Z0\t%0" ; |
| 15799 | |
| 15800 | strcpy (dest: buf, src: "fist" ); |
| 15801 | |
| 15802 | if (round_mode != I387_CW_ANY) |
| 15803 | output_asm_insn ("fldcw\t%3" , operands); |
| 15804 | |
| 15805 | p = "p%Z0\t%0" ; |
| 15806 | strcat (dest: buf, src: p + !(stack_top_dies || dimode_p)); |
| 15807 | |
| 15808 | output_asm_insn (buf, operands); |
| 15809 | |
| 15810 | if (round_mode != I387_CW_ANY) |
| 15811 | output_asm_insn ("fldcw\t%2" , operands); |
| 15812 | |
| 15813 | return "" ; |
| 15814 | } |
| 15815 | |
| 15816 | /* Output code for x87 ffreep insn. The OPNO argument, which may only |
| 15817 | have the values zero or one, indicates the ffreep insn's operand |
| 15818 | from the OPERANDS array. */ |
| 15819 | |
| 15820 | static const char * |
| 15821 | output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) |
| 15822 | { |
| 15823 | if (TARGET_USE_FFREEP) |
| 15824 | #ifdef HAVE_AS_IX86_FFREEP |
| 15825 | return opno ? "ffreep\t%y1" : "ffreep\t%y0" ; |
| 15826 | #else |
| 15827 | { |
| 15828 | static char retval[32]; |
| 15829 | int regno = REGNO (operands[opno]); |
| 15830 | |
| 15831 | gcc_assert (STACK_REGNO_P (regno)); |
| 15832 | |
| 15833 | regno -= FIRST_STACK_REG; |
| 15834 | |
| 15835 | snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf" , regno); |
| 15836 | return retval; |
| 15837 | } |
| 15838 | #endif |
| 15839 | |
| 15840 | return opno ? "fstp\t%y1" : "fstp\t%y0" ; |
| 15841 | } |
| 15842 | |
| 15843 | |
| 15844 | /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi |
| 15845 | should be used. UNORDERED_P is true when fucom should be used. */ |
| 15846 | |
| 15847 | const char * |
| 15848 | output_fp_compare (rtx_insn *insn, rtx *operands, |
| 15849 | bool eflags_p, bool unordered_p) |
| 15850 | { |
| 15851 | rtx *xops = eflags_p ? &operands[0] : &operands[1]; |
| 15852 | bool stack_top_dies; |
| 15853 | |
| 15854 | static char buf[40]; |
| 15855 | const char *p; |
| 15856 | |
| 15857 | gcc_assert (STACK_TOP_P (xops[0])); |
| 15858 | |
| 15859 | stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); |
| 15860 | |
| 15861 | if (eflags_p) |
| 15862 | { |
| 15863 | p = unordered_p ? "fucomi" : "fcomi" ; |
| 15864 | strcpy (dest: buf, src: p); |
| 15865 | |
| 15866 | p = "p\t{%y1, %0|%0, %y1}" ; |
| 15867 | strcat (dest: buf, src: p + !stack_top_dies); |
| 15868 | |
| 15869 | return buf; |
| 15870 | } |
| 15871 | |
| 15872 | if (STACK_REG_P (xops[1]) |
| 15873 | && stack_top_dies |
| 15874 | && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1)) |
| 15875 | { |
| 15876 | gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1); |
| 15877 | |
| 15878 | /* If both the top of the 387 stack die, and the other operand |
| 15879 | is also a stack register that dies, then this must be a |
| 15880 | `fcompp' float compare. */ |
| 15881 | p = unordered_p ? "fucompp" : "fcompp" ; |
| 15882 | strcpy (dest: buf, src: p); |
| 15883 | } |
| 15884 | else if (const0_operand (xops[1], VOIDmode)) |
| 15885 | { |
| 15886 | gcc_assert (!unordered_p); |
| 15887 | strcpy (dest: buf, src: "ftst" ); |
| 15888 | } |
| 15889 | else |
| 15890 | { |
| 15891 | if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT) |
| 15892 | { |
| 15893 | gcc_assert (!unordered_p); |
| 15894 | p = "ficom" ; |
| 15895 | } |
| 15896 | else |
| 15897 | p = unordered_p ? "fucom" : "fcom" ; |
| 15898 | |
| 15899 | strcpy (dest: buf, src: p); |
| 15900 | |
| 15901 | p = "p%Z2\t%y2" ; |
| 15902 | strcat (dest: buf, src: p + !stack_top_dies); |
| 15903 | } |
| 15904 | |
| 15905 | output_asm_insn (buf, operands); |
| 15906 | return "fnstsw\t%0" ; |
| 15907 | } |
| 15908 | |
| 15909 | void |
| 15910 | ix86_output_addr_vec_elt (FILE *file, int value) |
| 15911 | { |
| 15912 | const char *directive = ASM_LONG; |
| 15913 | |
| 15914 | #ifdef ASM_QUAD |
| 15915 | if (TARGET_LP64) |
| 15916 | directive = ASM_QUAD; |
| 15917 | #else |
| 15918 | gcc_assert (!TARGET_64BIT); |
| 15919 | #endif |
| 15920 | |
| 15921 | fprintf (stream: file, format: "%s%s%d\n" , directive, LPREFIX, value); |
| 15922 | } |
| 15923 | |
| 15924 | void |
| 15925 | ix86_output_addr_diff_elt (FILE *file, int value, int rel) |
| 15926 | { |
| 15927 | const char *directive = ASM_LONG; |
| 15928 | |
| 15929 | #ifdef ASM_QUAD |
| 15930 | if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) |
| 15931 | directive = ASM_QUAD; |
| 15932 | #else |
| 15933 | gcc_assert (!TARGET_64BIT); |
| 15934 | #endif |
| 15935 | /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ |
| 15936 | if (TARGET_64BIT || TARGET_VXWORKS_VAROFF) |
| 15937 | fprintf (stream: file, format: "%s%s%d-%s%d\n" , |
| 15938 | directive, LPREFIX, value, LPREFIX, rel); |
| 15939 | #if TARGET_MACHO |
| 15940 | else if (TARGET_MACHO) |
| 15941 | { |
| 15942 | fprintf (file, ASM_LONG "%s%d-" , LPREFIX, value); |
| 15943 | machopic_output_function_base_name (file); |
| 15944 | putc ('\n', file); |
| 15945 | } |
| 15946 | #endif |
| 15947 | else if (HAVE_AS_GOTOFF_IN_DATA) |
| 15948 | fprintf (stream: file, ASM_LONG "%s%d@GOTOFF\n" , LPREFIX, value); |
| 15949 | else |
| 15950 | asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n" , |
| 15951 | GOT_SYMBOL_NAME, LPREFIX, value); |
| 15952 | } |
| 15953 | |
| 15954 | #define LEA_MAX_STALL (3) |
| 15955 | #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1) |
| 15956 | |
| 15957 | /* Increase given DISTANCE in half-cycles according to |
| 15958 | dependencies between PREV and NEXT instructions. |
| 15959 | Add 1 half-cycle if there is no dependency and |
| 15960 | go to next cycle if there is some dependecy. */ |
| 15961 | |
| 15962 | static unsigned int |
| 15963 | increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance) |
| 15964 | { |
| 15965 | df_ref def, use; |
| 15966 | |
| 15967 | if (!prev || !next) |
| 15968 | return distance + (distance & 1) + 2; |
| 15969 | |
| 15970 | if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev)) |
| 15971 | return distance + 1; |
| 15972 | |
| 15973 | FOR_EACH_INSN_USE (use, next) |
| 15974 | FOR_EACH_INSN_DEF (def, prev) |
| 15975 | if (!DF_REF_IS_ARTIFICIAL (def) |
| 15976 | && DF_REF_REGNO (use) == DF_REF_REGNO (def)) |
| 15977 | return distance + (distance & 1) + 2; |
| 15978 | |
| 15979 | return distance + 1; |
| 15980 | } |
| 15981 | |
| 15982 | /* Function checks if instruction INSN defines register number |
| 15983 | REGNO1 or REGNO2. */ |
| 15984 | |
| 15985 | bool |
| 15986 | insn_defines_reg (unsigned int regno1, unsigned int regno2, |
| 15987 | rtx_insn *insn) |
| 15988 | { |
| 15989 | df_ref def; |
| 15990 | |
| 15991 | FOR_EACH_INSN_DEF (def, insn) |
| 15992 | if (DF_REF_REG_DEF_P (def) |
| 15993 | && !DF_REF_IS_ARTIFICIAL (def) |
| 15994 | && (regno1 == DF_REF_REGNO (def) |
| 15995 | || regno2 == DF_REF_REGNO (def))) |
| 15996 | return true; |
| 15997 | |
| 15998 | return false; |
| 15999 | } |
| 16000 | |
| 16001 | /* Function checks if instruction INSN uses register number |
| 16002 | REGNO as a part of address expression. */ |
| 16003 | |
| 16004 | static bool |
| 16005 | insn_uses_reg_mem (unsigned int regno, rtx insn) |
| 16006 | { |
| 16007 | df_ref use; |
| 16008 | |
| 16009 | FOR_EACH_INSN_USE (use, insn) |
| 16010 | if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use)) |
| 16011 | return true; |
| 16012 | |
| 16013 | return false; |
| 16014 | } |
| 16015 | |
| 16016 | /* Search backward for non-agu definition of register number REGNO1 |
| 16017 | or register number REGNO2 in basic block starting from instruction |
| 16018 | START up to head of basic block or instruction INSN. |
| 16019 | |
| 16020 | Function puts true value into *FOUND var if definition was found |
| 16021 | and false otherwise. |
| 16022 | |
| 16023 | Distance in half-cycles between START and found instruction or head |
| 16024 | of BB is added to DISTANCE and returned. */ |
| 16025 | |
| 16026 | static int |
| 16027 | distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, |
| 16028 | rtx_insn *insn, int distance, |
| 16029 | rtx_insn *start, bool *found) |
| 16030 | { |
| 16031 | basic_block bb = start ? BLOCK_FOR_INSN (insn: start) : NULL; |
| 16032 | rtx_insn *prev = start; |
| 16033 | rtx_insn *next = NULL; |
| 16034 | |
| 16035 | *found = false; |
| 16036 | |
| 16037 | while (prev |
| 16038 | && prev != insn |
| 16039 | && distance < LEA_SEARCH_THRESHOLD) |
| 16040 | { |
| 16041 | if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev)) |
| 16042 | { |
| 16043 | distance = increase_distance (prev, next, distance); |
| 16044 | if (insn_defines_reg (regno1, regno2, insn: prev)) |
| 16045 | { |
| 16046 | if (recog_memoized (insn: prev) < 0 |
| 16047 | || get_attr_type (prev) != TYPE_LEA) |
| 16048 | { |
| 16049 | *found = true; |
| 16050 | return distance; |
| 16051 | } |
| 16052 | } |
| 16053 | |
| 16054 | next = prev; |
| 16055 | } |
| 16056 | if (prev == BB_HEAD (bb)) |
| 16057 | break; |
| 16058 | |
| 16059 | prev = PREV_INSN (insn: prev); |
| 16060 | } |
| 16061 | |
| 16062 | return distance; |
| 16063 | } |
| 16064 | |
| 16065 | /* Search backward for non-agu definition of register number REGNO1 |
| 16066 | or register number REGNO2 in INSN's basic block until |
| 16067 | 1. Pass LEA_SEARCH_THRESHOLD instructions, or |
| 16068 | 2. Reach neighbor BBs boundary, or |
| 16069 | 3. Reach agu definition. |
| 16070 | Returns the distance between the non-agu definition point and INSN. |
| 16071 | If no definition point, returns -1. */ |
| 16072 | |
| 16073 | static int |
| 16074 | distance_non_agu_define (unsigned int regno1, unsigned int regno2, |
| 16075 | rtx_insn *insn) |
| 16076 | { |
| 16077 | basic_block bb = BLOCK_FOR_INSN (insn); |
| 16078 | int distance = 0; |
| 16079 | bool found = false; |
| 16080 | |
| 16081 | if (insn != BB_HEAD (bb)) |
| 16082 | distance = distance_non_agu_define_in_bb (regno1, regno2, insn, |
| 16083 | distance, start: PREV_INSN (insn), |
| 16084 | found: &found); |
| 16085 | |
| 16086 | if (!found && distance < LEA_SEARCH_THRESHOLD) |
| 16087 | { |
| 16088 | edge e; |
| 16089 | edge_iterator ei; |
| 16090 | bool simple_loop = false; |
| 16091 | |
| 16092 | FOR_EACH_EDGE (e, ei, bb->preds) |
| 16093 | if (e->src == bb) |
| 16094 | { |
| 16095 | simple_loop = true; |
| 16096 | break; |
| 16097 | } |
| 16098 | |
| 16099 | if (simple_loop) |
| 16100 | distance = distance_non_agu_define_in_bb (regno1, regno2, |
| 16101 | insn, distance, |
| 16102 | BB_END (bb), found: &found); |
| 16103 | else |
| 16104 | { |
| 16105 | int shortest_dist = -1; |
| 16106 | bool found_in_bb = false; |
| 16107 | |
| 16108 | FOR_EACH_EDGE (e, ei, bb->preds) |
| 16109 | { |
| 16110 | int bb_dist |
| 16111 | = distance_non_agu_define_in_bb (regno1, regno2, |
| 16112 | insn, distance, |
| 16113 | BB_END (e->src), |
| 16114 | found: &found_in_bb); |
| 16115 | if (found_in_bb) |
| 16116 | { |
| 16117 | if (shortest_dist < 0) |
| 16118 | shortest_dist = bb_dist; |
| 16119 | else if (bb_dist > 0) |
| 16120 | shortest_dist = MIN (bb_dist, shortest_dist); |
| 16121 | |
| 16122 | found = true; |
| 16123 | } |
| 16124 | } |
| 16125 | |
| 16126 | distance = shortest_dist; |
| 16127 | } |
| 16128 | } |
| 16129 | |
| 16130 | if (!found) |
| 16131 | return -1; |
| 16132 | |
| 16133 | return distance >> 1; |
| 16134 | } |
| 16135 | |
| 16136 | /* Return the distance in half-cycles between INSN and the next |
| 16137 | insn that uses register number REGNO in memory address added |
| 16138 | to DISTANCE. Return -1 if REGNO0 is set. |
| 16139 | |
| 16140 | Put true value into *FOUND if register usage was found and |
| 16141 | false otherwise. |
| 16142 | Put true value into *REDEFINED if register redefinition was |
| 16143 | found and false otherwise. */ |
| 16144 | |
| 16145 | static int |
| 16146 | distance_agu_use_in_bb (unsigned int regno, |
| 16147 | rtx_insn *insn, int distance, rtx_insn *start, |
| 16148 | bool *found, bool *redefined) |
| 16149 | { |
| 16150 | basic_block bb = NULL; |
| 16151 | rtx_insn *next = start; |
| 16152 | rtx_insn *prev = NULL; |
| 16153 | |
| 16154 | *found = false; |
| 16155 | *redefined = false; |
| 16156 | |
| 16157 | if (start != NULL_RTX) |
| 16158 | { |
| 16159 | bb = BLOCK_FOR_INSN (insn: start); |
| 16160 | if (start != BB_HEAD (bb)) |
| 16161 | /* If insn and start belong to the same bb, set prev to insn, |
| 16162 | so the call to increase_distance will increase the distance |
| 16163 | between insns by 1. */ |
| 16164 | prev = insn; |
| 16165 | } |
| 16166 | |
| 16167 | while (next |
| 16168 | && next != insn |
| 16169 | && distance < LEA_SEARCH_THRESHOLD) |
| 16170 | { |
| 16171 | if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next)) |
| 16172 | { |
| 16173 | distance = increase_distance(prev, next, distance); |
| 16174 | if (insn_uses_reg_mem (regno, insn: next)) |
| 16175 | { |
| 16176 | /* Return DISTANCE if OP0 is used in memory |
| 16177 | address in NEXT. */ |
| 16178 | *found = true; |
| 16179 | return distance; |
| 16180 | } |
| 16181 | |
| 16182 | if (insn_defines_reg (regno1: regno, INVALID_REGNUM, insn: next)) |
| 16183 | { |
| 16184 | /* Return -1 if OP0 is set in NEXT. */ |
| 16185 | *redefined = true; |
| 16186 | return -1; |
| 16187 | } |
| 16188 | |
| 16189 | prev = next; |
| 16190 | } |
| 16191 | |
| 16192 | if (next == BB_END (bb)) |
| 16193 | break; |
| 16194 | |
| 16195 | next = NEXT_INSN (insn: next); |
| 16196 | } |
| 16197 | |
| 16198 | return distance; |
| 16199 | } |
| 16200 | |
| 16201 | /* Return the distance between INSN and the next insn that uses |
| 16202 | register number REGNO0 in memory address. Return -1 if no such |
| 16203 | a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ |
| 16204 | |
| 16205 | static int |
| 16206 | distance_agu_use (unsigned int regno0, rtx_insn *insn) |
| 16207 | { |
| 16208 | basic_block bb = BLOCK_FOR_INSN (insn); |
| 16209 | int distance = 0; |
| 16210 | bool found = false; |
| 16211 | bool redefined = false; |
| 16212 | |
| 16213 | if (insn != BB_END (bb)) |
| 16214 | distance = distance_agu_use_in_bb (regno: regno0, insn, distance, |
| 16215 | start: NEXT_INSN (insn), |
| 16216 | found: &found, redefined: &redefined); |
| 16217 | |
| 16218 | if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD) |
| 16219 | { |
| 16220 | edge e; |
| 16221 | edge_iterator ei; |
| 16222 | bool simple_loop = false; |
| 16223 | |
| 16224 | FOR_EACH_EDGE (e, ei, bb->succs) |
| 16225 | if (e->dest == bb) |
| 16226 | { |
| 16227 | simple_loop = true; |
| 16228 | break; |
| 16229 | } |
| 16230 | |
| 16231 | if (simple_loop) |
| 16232 | distance = distance_agu_use_in_bb (regno: regno0, insn, |
| 16233 | distance, BB_HEAD (bb), |
| 16234 | found: &found, redefined: &redefined); |
| 16235 | else |
| 16236 | { |
| 16237 | int shortest_dist = -1; |
| 16238 | bool found_in_bb = false; |
| 16239 | bool redefined_in_bb = false; |
| 16240 | |
| 16241 | FOR_EACH_EDGE (e, ei, bb->succs) |
| 16242 | { |
| 16243 | int bb_dist |
| 16244 | = distance_agu_use_in_bb (regno: regno0, insn, |
| 16245 | distance, BB_HEAD (e->dest), |
| 16246 | found: &found_in_bb, redefined: &redefined_in_bb); |
| 16247 | if (found_in_bb) |
| 16248 | { |
| 16249 | if (shortest_dist < 0) |
| 16250 | shortest_dist = bb_dist; |
| 16251 | else if (bb_dist > 0) |
| 16252 | shortest_dist = MIN (bb_dist, shortest_dist); |
| 16253 | |
| 16254 | found = true; |
| 16255 | } |
| 16256 | } |
| 16257 | |
| 16258 | distance = shortest_dist; |
| 16259 | } |
| 16260 | } |
| 16261 | |
| 16262 | if (!found || redefined) |
| 16263 | return -1; |
| 16264 | |
| 16265 | return distance >> 1; |
| 16266 | } |
| 16267 | |
| 16268 | /* Define this macro to tune LEA priority vs ADD, it take effect when |
| 16269 | there is a dilemma of choosing LEA or ADD |
| 16270 | Negative value: ADD is more preferred than LEA |
| 16271 | Zero: Neutral |
| 16272 | Positive value: LEA is more preferred than ADD. */ |
| 16273 | #define IX86_LEA_PRIORITY 0 |
| 16274 | |
| 16275 | /* Return true if usage of lea INSN has performance advantage |
| 16276 | over a sequence of instructions. Instructions sequence has |
| 16277 | SPLIT_COST cycles higher latency than lea latency. */ |
| 16278 | |
| 16279 | static bool |
| 16280 | ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, |
| 16281 | unsigned int regno2, int split_cost, bool has_scale) |
| 16282 | { |
| 16283 | int dist_define, dist_use; |
| 16284 | |
| 16285 | /* For Atom processors newer than Bonnell, if using a 2-source or |
| 16286 | 3-source LEA for non-destructive destination purposes, or due to |
| 16287 | wanting ability to use SCALE, the use of LEA is justified. */ |
| 16288 | if (!TARGET_CPU_P (BONNELL)) |
| 16289 | { |
| 16290 | if (has_scale) |
| 16291 | return true; |
| 16292 | if (split_cost < 1) |
| 16293 | return false; |
| 16294 | if (regno0 == regno1 || regno0 == regno2) |
| 16295 | return false; |
| 16296 | return true; |
| 16297 | } |
| 16298 | |
| 16299 | /* Remember recog_data content. */ |
| 16300 | struct recog_data_d recog_data_save = recog_data; |
| 16301 | |
| 16302 | dist_define = distance_non_agu_define (regno1, regno2, insn); |
| 16303 | dist_use = distance_agu_use (regno0, insn); |
| 16304 | |
| 16305 | /* distance_non_agu_define can call get_attr_type which can call |
| 16306 | recog_memoized, restore recog_data back to previous content. */ |
| 16307 | recog_data = recog_data_save; |
| 16308 | |
| 16309 | if (dist_define < 0 || dist_define >= LEA_MAX_STALL) |
| 16310 | { |
| 16311 | /* If there is no non AGU operand definition, no AGU |
| 16312 | operand usage and split cost is 0 then both lea |
| 16313 | and non lea variants have same priority. Currently |
| 16314 | we prefer lea for 64 bit code and non lea on 32 bit |
| 16315 | code. */ |
| 16316 | if (dist_use < 0 && split_cost == 0) |
| 16317 | return TARGET_64BIT || IX86_LEA_PRIORITY; |
| 16318 | else |
| 16319 | return true; |
| 16320 | } |
| 16321 | |
| 16322 | /* With longer definitions distance lea is more preferable. |
| 16323 | Here we change it to take into account splitting cost and |
| 16324 | lea priority. */ |
| 16325 | dist_define += split_cost + IX86_LEA_PRIORITY; |
| 16326 | |
| 16327 | /* If there is no use in memory addess then we just check |
| 16328 | that split cost exceeds AGU stall. */ |
| 16329 | if (dist_use < 0) |
| 16330 | return dist_define > LEA_MAX_STALL; |
| 16331 | |
| 16332 | /* If this insn has both backward non-agu dependence and forward |
| 16333 | agu dependence, the one with short distance takes effect. */ |
| 16334 | return dist_define >= dist_use; |
| 16335 | } |
| 16336 | |
| 16337 | /* Return true if we need to split op0 = op1 + op2 into a sequence of |
| 16338 | move and add to avoid AGU stalls. */ |
| 16339 | |
| 16340 | bool |
| 16341 | ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[]) |
| 16342 | { |
| 16343 | unsigned int regno0, regno1, regno2; |
| 16344 | |
| 16345 | /* Check if we need to optimize. */ |
| 16346 | if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
| 16347 | return false; |
| 16348 | |
| 16349 | regno0 = true_regnum (operands[0]); |
| 16350 | regno1 = true_regnum (operands[1]); |
| 16351 | regno2 = true_regnum (operands[2]); |
| 16352 | |
| 16353 | /* We need to split only adds with non destructive |
| 16354 | destination operand. */ |
| 16355 | if (regno0 == regno1 || regno0 == regno2) |
| 16356 | return false; |
| 16357 | else |
| 16358 | return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 1, has_scale: false); |
| 16359 | } |
| 16360 | |
| 16361 | /* Return true if we should emit lea instruction instead of mov |
| 16362 | instruction. */ |
| 16363 | |
| 16364 | bool |
| 16365 | ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[]) |
| 16366 | { |
| 16367 | unsigned int regno0, regno1; |
| 16368 | |
| 16369 | /* Check if we need to optimize. */ |
| 16370 | if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
| 16371 | return false; |
| 16372 | |
| 16373 | /* Use lea for reg to reg moves only. */ |
| 16374 | if (!REG_P (operands[0]) || !REG_P (operands[1])) |
| 16375 | return false; |
| 16376 | |
| 16377 | regno0 = true_regnum (operands[0]); |
| 16378 | regno1 = true_regnum (operands[1]); |
| 16379 | |
| 16380 | return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, split_cost: 0, has_scale: false); |
| 16381 | } |
| 16382 | |
| 16383 | /* Return true if we need to split lea into a sequence of |
| 16384 | instructions to avoid AGU stalls during peephole2. */ |
| 16385 | |
| 16386 | bool |
| 16387 | ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) |
| 16388 | { |
| 16389 | unsigned int regno0, regno1, regno2; |
| 16390 | int split_cost; |
| 16391 | struct ix86_address parts; |
| 16392 | int ok; |
| 16393 | |
| 16394 | /* The "at least two components" test below might not catch simple |
| 16395 | move or zero extension insns if parts.base is non-NULL and parts.disp |
| 16396 | is const0_rtx as the only components in the address, e.g. if the |
| 16397 | register is %rbp or %r13. As this test is much cheaper and moves or |
| 16398 | zero extensions are the common case, do this check first. */ |
| 16399 | if (REG_P (operands[1]) |
| 16400 | || (SImode_address_operand (operands[1], VOIDmode) |
| 16401 | && REG_P (XEXP (operands[1], 0)))) |
| 16402 | return false; |
| 16403 | |
| 16404 | ok = ix86_decompose_address (addr: operands[1], out: &parts); |
| 16405 | gcc_assert (ok); |
| 16406 | |
| 16407 | /* There should be at least two components in the address. */ |
| 16408 | if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX) |
| 16409 | + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2) |
| 16410 | return false; |
| 16411 | |
| 16412 | /* We should not split into add if non legitimate pic |
| 16413 | operand is used as displacement. */ |
| 16414 | if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp)) |
| 16415 | return false; |
| 16416 | |
| 16417 | regno0 = true_regnum (operands[0]) ; |
| 16418 | regno1 = INVALID_REGNUM; |
| 16419 | regno2 = INVALID_REGNUM; |
| 16420 | |
| 16421 | if (parts.base) |
| 16422 | regno1 = true_regnum (parts.base); |
| 16423 | if (parts.index) |
| 16424 | regno2 = true_regnum (parts.index); |
| 16425 | |
| 16426 | /* Use add for a = a + b and a = b + a since it is faster and shorter |
| 16427 | than lea for most processors. For the processors like BONNELL, if |
| 16428 | the destination register of LEA holds an actual address which will |
| 16429 | be used soon, LEA is better and otherwise ADD is better. */ |
| 16430 | if (!TARGET_CPU_P (BONNELL) |
| 16431 | && parts.scale == 1 |
| 16432 | && (!parts.disp || parts.disp == const0_rtx) |
| 16433 | && (regno0 == regno1 || regno0 == regno2)) |
| 16434 | return true; |
| 16435 | |
| 16436 | /* Split with -Oz if the encoding requires fewer bytes. */ |
| 16437 | if (optimize_size > 1 |
| 16438 | && parts.scale > 1 |
| 16439 | && !parts.base |
| 16440 | && (!parts.disp || parts.disp == const0_rtx)) |
| 16441 | return true; |
| 16442 | |
| 16443 | /* Check we need to optimize. */ |
| 16444 | if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun)) |
| 16445 | return false; |
| 16446 | |
| 16447 | split_cost = 0; |
| 16448 | |
| 16449 | /* Compute how many cycles we will add to execution time |
| 16450 | if split lea into a sequence of instructions. */ |
| 16451 | if (parts.base || parts.index) |
| 16452 | { |
| 16453 | /* Have to use mov instruction if non desctructive |
| 16454 | destination form is used. */ |
| 16455 | if (regno1 != regno0 && regno2 != regno0) |
| 16456 | split_cost += 1; |
| 16457 | |
| 16458 | /* Have to add index to base if both exist. */ |
| 16459 | if (parts.base && parts.index) |
| 16460 | split_cost += 1; |
| 16461 | |
| 16462 | /* Have to use shift and adds if scale is 2 or greater. */ |
| 16463 | if (parts.scale > 1) |
| 16464 | { |
| 16465 | if (regno0 != regno1) |
| 16466 | split_cost += 1; |
| 16467 | else if (regno2 == regno0) |
| 16468 | split_cost += 4; |
| 16469 | else |
| 16470 | split_cost += parts.scale; |
| 16471 | } |
| 16472 | |
| 16473 | /* Have to use add instruction with immediate if |
| 16474 | disp is non zero. */ |
| 16475 | if (parts.disp && parts.disp != const0_rtx) |
| 16476 | split_cost += 1; |
| 16477 | |
| 16478 | /* Subtract the price of lea. */ |
| 16479 | split_cost -= 1; |
| 16480 | } |
| 16481 | |
| 16482 | return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost, |
| 16483 | has_scale: parts.scale > 1); |
| 16484 | } |
| 16485 | |
| 16486 | /* Return true if it is ok to optimize an ADD operation to LEA |
| 16487 | operation to avoid flag register consumation. For most processors, |
| 16488 | ADD is faster than LEA. For the processors like BONNELL, if the |
| 16489 | destination register of LEA holds an actual address which will be |
| 16490 | used soon, LEA is better and otherwise ADD is better. */ |
| 16491 | |
| 16492 | bool |
| 16493 | ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[]) |
| 16494 | { |
| 16495 | unsigned int regno0 = true_regnum (operands[0]); |
| 16496 | unsigned int regno1 = true_regnum (operands[1]); |
| 16497 | unsigned int regno2 = true_regnum (operands[2]); |
| 16498 | |
| 16499 | /* If a = b + c, (a!=b && a!=c), must use lea form. */ |
| 16500 | if (regno0 != regno1 && regno0 != regno2) |
| 16501 | return true; |
| 16502 | |
| 16503 | if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
| 16504 | return false; |
| 16505 | |
| 16506 | return ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 0, has_scale: false); |
| 16507 | } |
| 16508 | |
| 16509 | /* Return true if destination reg of SET_BODY is shift count of |
| 16510 | USE_BODY. */ |
| 16511 | |
| 16512 | static bool |
| 16513 | ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) |
| 16514 | { |
| 16515 | rtx set_dest; |
| 16516 | rtx shift_rtx; |
| 16517 | int i; |
| 16518 | |
| 16519 | /* Retrieve destination of SET_BODY. */ |
| 16520 | switch (GET_CODE (set_body)) |
| 16521 | { |
| 16522 | case SET: |
| 16523 | set_dest = SET_DEST (set_body); |
| 16524 | if (!set_dest || !REG_P (set_dest)) |
| 16525 | return false; |
| 16526 | break; |
| 16527 | case PARALLEL: |
| 16528 | for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) |
| 16529 | if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), |
| 16530 | use_body)) |
| 16531 | return true; |
| 16532 | /* FALLTHROUGH */ |
| 16533 | default: |
| 16534 | return false; |
| 16535 | } |
| 16536 | |
| 16537 | /* Retrieve shift count of USE_BODY. */ |
| 16538 | switch (GET_CODE (use_body)) |
| 16539 | { |
| 16540 | case SET: |
| 16541 | shift_rtx = XEXP (use_body, 1); |
| 16542 | break; |
| 16543 | case PARALLEL: |
| 16544 | for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) |
| 16545 | if (ix86_dep_by_shift_count_body (set_body, |
| 16546 | XVECEXP (use_body, 0, i))) |
| 16547 | return true; |
| 16548 | /* FALLTHROUGH */ |
| 16549 | default: |
| 16550 | return false; |
| 16551 | } |
| 16552 | |
| 16553 | if (shift_rtx |
| 16554 | && (GET_CODE (shift_rtx) == ASHIFT |
| 16555 | || GET_CODE (shift_rtx) == LSHIFTRT |
| 16556 | || GET_CODE (shift_rtx) == ASHIFTRT |
| 16557 | || GET_CODE (shift_rtx) == ROTATE |
| 16558 | || GET_CODE (shift_rtx) == ROTATERT)) |
| 16559 | { |
| 16560 | rtx shift_count = XEXP (shift_rtx, 1); |
| 16561 | |
| 16562 | /* Return true if shift count is dest of SET_BODY. */ |
| 16563 | if (REG_P (shift_count)) |
| 16564 | { |
| 16565 | /* Add check since it can be invoked before register |
| 16566 | allocation in pre-reload schedule. */ |
| 16567 | if (reload_completed |
| 16568 | && true_regnum (set_dest) == true_regnum (shift_count)) |
| 16569 | return true; |
| 16570 | else if (REGNO(set_dest) == REGNO(shift_count)) |
| 16571 | return true; |
| 16572 | } |
| 16573 | } |
| 16574 | |
| 16575 | return false; |
| 16576 | } |
| 16577 | |
| 16578 | /* Return true if destination reg of SET_INSN is shift count of |
| 16579 | USE_INSN. */ |
| 16580 | |
| 16581 | bool |
| 16582 | ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) |
| 16583 | { |
| 16584 | return ix86_dep_by_shift_count_body (set_body: PATTERN (insn: set_insn), |
| 16585 | use_body: PATTERN (insn: use_insn)); |
| 16586 | } |
| 16587 | |
| 16588 | /* Return TRUE if the operands to a vec_interleave_{high,low}v2df |
| 16589 | are ok, keeping in mind the possible movddup alternative. */ |
| 16590 | |
| 16591 | bool |
| 16592 | ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high) |
| 16593 | { |
| 16594 | if (MEM_P (operands[0])) |
| 16595 | return rtx_equal_p (operands[0], operands[1 + high]); |
| 16596 | if (MEM_P (operands[1]) && MEM_P (operands[2])) |
| 16597 | return false; |
| 16598 | return true; |
| 16599 | } |
| 16600 | |
| 16601 | /* A subroutine of ix86_build_signbit_mask. If VECT is true, |
| 16602 | then replicate the value for all elements of the vector |
| 16603 | register. */ |
| 16604 | |
| 16605 | rtx |
| 16606 | ix86_build_const_vector (machine_mode mode, bool vect, rtx value) |
| 16607 | { |
| 16608 | int i, n_elt; |
| 16609 | rtvec v; |
| 16610 | machine_mode scalar_mode; |
| 16611 | |
| 16612 | switch (mode) |
| 16613 | { |
| 16614 | case E_V64QImode: |
| 16615 | case E_V32QImode: |
| 16616 | case E_V16QImode: |
| 16617 | case E_V32HImode: |
| 16618 | case E_V16HImode: |
| 16619 | case E_V8HImode: |
| 16620 | case E_V16SImode: |
| 16621 | case E_V8SImode: |
| 16622 | case E_V4SImode: |
| 16623 | case E_V2SImode: |
| 16624 | case E_V8DImode: |
| 16625 | case E_V4DImode: |
| 16626 | case E_V2DImode: |
| 16627 | gcc_assert (vect); |
| 16628 | /* FALLTHRU */ |
| 16629 | case E_V2HFmode: |
| 16630 | case E_V4HFmode: |
| 16631 | case E_V8HFmode: |
| 16632 | case E_V16HFmode: |
| 16633 | case E_V32HFmode: |
| 16634 | case E_V16SFmode: |
| 16635 | case E_V8SFmode: |
| 16636 | case E_V4SFmode: |
| 16637 | case E_V2SFmode: |
| 16638 | case E_V8DFmode: |
| 16639 | case E_V4DFmode: |
| 16640 | case E_V2DFmode: |
| 16641 | case E_V32BFmode: |
| 16642 | case E_V16BFmode: |
| 16643 | case E_V8BFmode: |
| 16644 | case E_V4BFmode: |
| 16645 | case E_V2BFmode: |
| 16646 | n_elt = GET_MODE_NUNITS (mode); |
| 16647 | v = rtvec_alloc (n_elt); |
| 16648 | scalar_mode = GET_MODE_INNER (mode); |
| 16649 | |
| 16650 | RTVEC_ELT (v, 0) = value; |
| 16651 | |
| 16652 | for (i = 1; i < n_elt; ++i) |
| 16653 | RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode); |
| 16654 | |
| 16655 | return gen_rtx_CONST_VECTOR (mode, v); |
| 16656 | |
| 16657 | default: |
| 16658 | gcc_unreachable (); |
| 16659 | } |
| 16660 | } |
| 16661 | |
| 16662 | /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders |
| 16663 | and ix86_expand_int_vcond. Create a mask for the sign bit in MODE |
| 16664 | for an SSE register. If VECT is true, then replicate the mask for |
| 16665 | all elements of the vector register. If INVERT is true, then create |
| 16666 | a mask excluding the sign bit. */ |
| 16667 | |
| 16668 | rtx |
| 16669 | ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) |
| 16670 | { |
| 16671 | machine_mode vec_mode, imode; |
| 16672 | wide_int w; |
| 16673 | rtx mask, v; |
| 16674 | |
| 16675 | switch (mode) |
| 16676 | { |
| 16677 | case E_V2HFmode: |
| 16678 | case E_V4HFmode: |
| 16679 | case E_V8HFmode: |
| 16680 | case E_V16HFmode: |
| 16681 | case E_V32HFmode: |
| 16682 | case E_V32BFmode: |
| 16683 | case E_V16BFmode: |
| 16684 | case E_V8BFmode: |
| 16685 | case E_V4BFmode: |
| 16686 | case E_V2BFmode: |
| 16687 | vec_mode = mode; |
| 16688 | imode = HImode; |
| 16689 | break; |
| 16690 | |
| 16691 | case E_V16SImode: |
| 16692 | case E_V16SFmode: |
| 16693 | case E_V8SImode: |
| 16694 | case E_V4SImode: |
| 16695 | case E_V8SFmode: |
| 16696 | case E_V4SFmode: |
| 16697 | case E_V2SFmode: |
| 16698 | case E_V2SImode: |
| 16699 | vec_mode = mode; |
| 16700 | imode = SImode; |
| 16701 | break; |
| 16702 | |
| 16703 | case E_V8DImode: |
| 16704 | case E_V4DImode: |
| 16705 | case E_V2DImode: |
| 16706 | case E_V8DFmode: |
| 16707 | case E_V4DFmode: |
| 16708 | case E_V2DFmode: |
| 16709 | vec_mode = mode; |
| 16710 | imode = DImode; |
| 16711 | break; |
| 16712 | |
| 16713 | case E_TImode: |
| 16714 | case E_TFmode: |
| 16715 | vec_mode = VOIDmode; |
| 16716 | imode = TImode; |
| 16717 | break; |
| 16718 | |
| 16719 | default: |
| 16720 | gcc_unreachable (); |
| 16721 | } |
| 16722 | |
| 16723 | machine_mode inner_mode = GET_MODE_INNER (mode); |
| 16724 | w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1, |
| 16725 | GET_MODE_BITSIZE (inner_mode)); |
| 16726 | if (invert) |
| 16727 | w = wi::bit_not (x: w); |
| 16728 | |
| 16729 | /* Force this value into the low part of a fp vector constant. */ |
| 16730 | mask = immed_wide_int_const (w, imode); |
| 16731 | mask = gen_lowpart (inner_mode, mask); |
| 16732 | |
| 16733 | if (vec_mode == VOIDmode) |
| 16734 | return force_reg (inner_mode, mask); |
| 16735 | |
| 16736 | v = ix86_build_const_vector (mode: vec_mode, vect, value: mask); |
| 16737 | return force_reg (vec_mode, v); |
| 16738 | } |
| 16739 | |
| 16740 | /* Return HOST_WIDE_INT for const vector OP in MODE. */ |
| 16741 | |
| 16742 | HOST_WIDE_INT |
| 16743 | ix86_convert_const_vector_to_integer (rtx op, machine_mode mode) |
| 16744 | { |
| 16745 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| 16746 | gcc_unreachable (); |
| 16747 | |
| 16748 | int nunits = GET_MODE_NUNITS (mode); |
| 16749 | wide_int val = wi::zero (GET_MODE_BITSIZE (mode)); |
| 16750 | machine_mode innermode = GET_MODE_INNER (mode); |
| 16751 | unsigned int innermode_bits = GET_MODE_BITSIZE (innermode); |
| 16752 | |
| 16753 | switch (mode) |
| 16754 | { |
| 16755 | case E_V2QImode: |
| 16756 | case E_V4QImode: |
| 16757 | case E_V2HImode: |
| 16758 | case E_V8QImode: |
| 16759 | case E_V4HImode: |
| 16760 | case E_V2SImode: |
| 16761 | for (int i = 0; i < nunits; ++i) |
| 16762 | { |
| 16763 | int v = INTVAL (XVECEXP (op, 0, i)); |
| 16764 | wide_int wv = wi::shwi (val: v, precision: innermode_bits); |
| 16765 | val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits); |
| 16766 | } |
| 16767 | break; |
| 16768 | case E_V1SImode: |
| 16769 | case E_V1DImode: |
| 16770 | op = CONST_VECTOR_ELT (op, 0); |
| 16771 | return INTVAL (op); |
| 16772 | case E_V2HFmode: |
| 16773 | case E_V2BFmode: |
| 16774 | case E_V4HFmode: |
| 16775 | case E_V4BFmode: |
| 16776 | case E_V2SFmode: |
| 16777 | for (int i = 0; i < nunits; ++i) |
| 16778 | { |
| 16779 | rtx x = XVECEXP (op, 0, i); |
| 16780 | int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x), |
| 16781 | REAL_MODE_FORMAT (innermode)); |
| 16782 | wide_int wv = wi::shwi (val: v, precision: innermode_bits); |
| 16783 | val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits); |
| 16784 | } |
| 16785 | break; |
| 16786 | default: |
| 16787 | gcc_unreachable (); |
| 16788 | } |
| 16789 | |
| 16790 | return val.to_shwi (); |
| 16791 | } |
| 16792 | |
| 16793 | int ix86_get_flags_cc (rtx_code code) |
| 16794 | { |
| 16795 | switch (code) |
| 16796 | { |
| 16797 | case NE: return X86_CCNE; |
| 16798 | case EQ: return X86_CCE; |
| 16799 | case GE: return X86_CCNL; |
| 16800 | case GT: return X86_CCNLE; |
| 16801 | case LE: return X86_CCLE; |
| 16802 | case LT: return X86_CCL; |
| 16803 | case GEU: return X86_CCNB; |
| 16804 | case GTU: return X86_CCNBE; |
| 16805 | case LEU: return X86_CCBE; |
| 16806 | case LTU: return X86_CCB; |
| 16807 | default: return -1; |
| 16808 | } |
| 16809 | } |
| 16810 | |
| 16811 | /* Return TRUE or FALSE depending on whether the first SET in INSN |
| 16812 | has source and destination with matching CC modes, and that the |
| 16813 | CC mode is at least as constrained as REQ_MODE. */ |
| 16814 | |
| 16815 | bool |
| 16816 | ix86_match_ccmode (rtx insn, machine_mode req_mode) |
| 16817 | { |
| 16818 | rtx set; |
| 16819 | machine_mode set_mode; |
| 16820 | |
| 16821 | set = PATTERN (insn); |
| 16822 | if (GET_CODE (set) == PARALLEL) |
| 16823 | set = XVECEXP (set, 0, 0); |
| 16824 | gcc_assert (GET_CODE (set) == SET); |
| 16825 | gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); |
| 16826 | |
| 16827 | set_mode = GET_MODE (SET_DEST (set)); |
| 16828 | switch (set_mode) |
| 16829 | { |
| 16830 | case E_CCNOmode: |
| 16831 | if (req_mode != CCNOmode |
| 16832 | && (req_mode != CCmode |
| 16833 | || XEXP (SET_SRC (set), 1) != const0_rtx)) |
| 16834 | return false; |
| 16835 | break; |
| 16836 | case E_CCmode: |
| 16837 | if (req_mode == CCGCmode) |
| 16838 | return false; |
| 16839 | /* FALLTHRU */ |
| 16840 | case E_CCGCmode: |
| 16841 | if (req_mode == CCGOCmode || req_mode == CCNOmode) |
| 16842 | return false; |
| 16843 | /* FALLTHRU */ |
| 16844 | case E_CCGOCmode: |
| 16845 | if (req_mode == CCZmode) |
| 16846 | return false; |
| 16847 | /* FALLTHRU */ |
| 16848 | case E_CCZmode: |
| 16849 | break; |
| 16850 | |
| 16851 | case E_CCGZmode: |
| 16852 | |
| 16853 | case E_CCAmode: |
| 16854 | case E_CCCmode: |
| 16855 | case E_CCOmode: |
| 16856 | case E_CCPmode: |
| 16857 | case E_CCSmode: |
| 16858 | if (set_mode != req_mode) |
| 16859 | return false; |
| 16860 | break; |
| 16861 | |
| 16862 | default: |
| 16863 | gcc_unreachable (); |
| 16864 | } |
| 16865 | |
| 16866 | return GET_MODE (SET_SRC (set)) == set_mode; |
| 16867 | } |
| 16868 | |
| 16869 | machine_mode |
| 16870 | ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) |
| 16871 | { |
| 16872 | machine_mode mode = GET_MODE (op0); |
| 16873 | |
| 16874 | if (SCALAR_FLOAT_MODE_P (mode)) |
| 16875 | { |
| 16876 | gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); |
| 16877 | return CCFPmode; |
| 16878 | } |
| 16879 | |
| 16880 | switch (code) |
| 16881 | { |
| 16882 | /* Only zero flag is needed. */ |
| 16883 | case EQ: /* ZF=0 */ |
| 16884 | case NE: /* ZF!=0 */ |
| 16885 | return CCZmode; |
| 16886 | /* Codes needing carry flag. */ |
| 16887 | case GEU: /* CF=0 */ |
| 16888 | case LTU: /* CF=1 */ |
| 16889 | rtx geu; |
| 16890 | /* Detect overflow checks. They need just the carry flag. */ |
| 16891 | if (GET_CODE (op0) == PLUS |
| 16892 | && (rtx_equal_p (op1, XEXP (op0, 0)) |
| 16893 | || rtx_equal_p (op1, XEXP (op0, 1)))) |
| 16894 | return CCCmode; |
| 16895 | /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns. |
| 16896 | Match LTU of op0 |
| 16897 | (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) |
| 16898 | and op1 |
| 16899 | (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)) |
| 16900 | where CC_CCC is either CC or CCC. */ |
| 16901 | else if (code == LTU |
| 16902 | && GET_CODE (op0) == NEG |
| 16903 | && GET_CODE (geu = XEXP (op0, 0)) == GEU |
| 16904 | && REG_P (XEXP (geu, 0)) |
| 16905 | && (GET_MODE (XEXP (geu, 0)) == CCCmode |
| 16906 | || GET_MODE (XEXP (geu, 0)) == CCmode) |
| 16907 | && REGNO (XEXP (geu, 0)) == FLAGS_REG |
| 16908 | && XEXP (geu, 1) == const0_rtx |
| 16909 | && GET_CODE (op1) == LTU |
| 16910 | && REG_P (XEXP (op1, 0)) |
| 16911 | && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0)) |
| 16912 | && REGNO (XEXP (op1, 0)) == FLAGS_REG |
| 16913 | && XEXP (op1, 1) == const0_rtx) |
| 16914 | return CCCmode; |
| 16915 | /* Similarly for *x86_cmc pattern. |
| 16916 | Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) |
| 16917 | and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)). |
| 16918 | It is sufficient to test that the operand modes are CCCmode. */ |
| 16919 | else if (code == LTU |
| 16920 | && GET_CODE (op0) == NEG |
| 16921 | && GET_CODE (XEXP (op0, 0)) == LTU |
| 16922 | && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode |
| 16923 | && GET_CODE (op1) == GEU |
| 16924 | && GET_MODE (XEXP (op1, 0)) == CCCmode) |
| 16925 | return CCCmode; |
| 16926 | /* Similarly for the comparison of addcarry/subborrow pattern. */ |
| 16927 | else if (code == LTU |
| 16928 | && GET_CODE (op0) == ZERO_EXTEND |
| 16929 | && GET_CODE (op1) == PLUS |
| 16930 | && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode) |
| 16931 | && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND) |
| 16932 | return CCCmode; |
| 16933 | else |
| 16934 | return CCmode; |
| 16935 | case GTU: /* CF=0 & ZF=0 */ |
| 16936 | case LEU: /* CF=1 | ZF=1 */ |
| 16937 | return CCmode; |
| 16938 | /* Codes possibly doable only with sign flag when |
| 16939 | comparing against zero. */ |
| 16940 | case GE: /* SF=OF or SF=0 */ |
| 16941 | case LT: /* SF<>OF or SF=1 */ |
| 16942 | if (op1 == const0_rtx) |
| 16943 | return CCGOCmode; |
| 16944 | else |
| 16945 | /* For other cases Carry flag is not required. */ |
| 16946 | return CCGCmode; |
| 16947 | /* Codes doable only with sign flag when comparing |
| 16948 | against zero, but we miss jump instruction for it |
| 16949 | so we need to use relational tests against overflow |
| 16950 | that thus needs to be zero. */ |
| 16951 | case GT: /* ZF=0 & SF=OF */ |
| 16952 | case LE: /* ZF=1 | SF<>OF */ |
| 16953 | if (op1 == const0_rtx) |
| 16954 | return CCNOmode; |
| 16955 | else |
| 16956 | return CCGCmode; |
| 16957 | default: |
| 16958 | /* CCmode should be used in all other cases. */ |
| 16959 | return CCmode; |
| 16960 | } |
| 16961 | } |
| 16962 | |
| 16963 | /* Return TRUE or FALSE depending on whether the ptest instruction |
| 16964 | INSN has source and destination with suitable matching CC modes. */ |
| 16965 | |
| 16966 | bool |
| 16967 | ix86_match_ptest_ccmode (rtx insn) |
| 16968 | { |
| 16969 | rtx set, src; |
| 16970 | machine_mode set_mode; |
| 16971 | |
| 16972 | set = PATTERN (insn); |
| 16973 | gcc_assert (GET_CODE (set) == SET); |
| 16974 | src = SET_SRC (set); |
| 16975 | gcc_assert (GET_CODE (src) == UNSPEC |
| 16976 | && XINT (src, 1) == UNSPEC_PTEST); |
| 16977 | |
| 16978 | set_mode = GET_MODE (src); |
| 16979 | if (set_mode != CCZmode |
| 16980 | && set_mode != CCCmode |
| 16981 | && set_mode != CCmode) |
| 16982 | return false; |
| 16983 | return GET_MODE (SET_DEST (set)) == set_mode; |
| 16984 | } |
| 16985 | |
| 16986 | /* Return the fixed registers used for condition codes. */ |
| 16987 | |
| 16988 | static bool |
| 16989 | ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) |
| 16990 | { |
| 16991 | *p1 = FLAGS_REG; |
| 16992 | *p2 = INVALID_REGNUM; |
| 16993 | return true; |
| 16994 | } |
| 16995 | |
| 16996 | /* If two condition code modes are compatible, return a condition code |
| 16997 | mode which is compatible with both. Otherwise, return |
| 16998 | VOIDmode. */ |
| 16999 | |
| 17000 | static machine_mode |
| 17001 | ix86_cc_modes_compatible (machine_mode m1, machine_mode m2) |
| 17002 | { |
| 17003 | if (m1 == m2) |
| 17004 | return m1; |
| 17005 | |
| 17006 | if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) |
| 17007 | return VOIDmode; |
| 17008 | |
| 17009 | if ((m1 == CCGCmode && m2 == CCGOCmode) |
| 17010 | || (m1 == CCGOCmode && m2 == CCGCmode)) |
| 17011 | return CCGCmode; |
| 17012 | |
| 17013 | if ((m1 == CCNOmode && m2 == CCGOCmode) |
| 17014 | || (m1 == CCGOCmode && m2 == CCNOmode)) |
| 17015 | return CCNOmode; |
| 17016 | |
| 17017 | if (m1 == CCZmode |
| 17018 | && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode)) |
| 17019 | return m2; |
| 17020 | else if (m2 == CCZmode |
| 17021 | && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode)) |
| 17022 | return m1; |
| 17023 | |
| 17024 | switch (m1) |
| 17025 | { |
| 17026 | default: |
| 17027 | gcc_unreachable (); |
| 17028 | |
| 17029 | case E_CCmode: |
| 17030 | case E_CCGCmode: |
| 17031 | case E_CCGOCmode: |
| 17032 | case E_CCNOmode: |
| 17033 | case E_CCAmode: |
| 17034 | case E_CCCmode: |
| 17035 | case E_CCOmode: |
| 17036 | case E_CCPmode: |
| 17037 | case E_CCSmode: |
| 17038 | case E_CCZmode: |
| 17039 | switch (m2) |
| 17040 | { |
| 17041 | default: |
| 17042 | return VOIDmode; |
| 17043 | |
| 17044 | case E_CCmode: |
| 17045 | case E_CCGCmode: |
| 17046 | case E_CCGOCmode: |
| 17047 | case E_CCNOmode: |
| 17048 | case E_CCAmode: |
| 17049 | case E_CCCmode: |
| 17050 | case E_CCOmode: |
| 17051 | case E_CCPmode: |
| 17052 | case E_CCSmode: |
| 17053 | case E_CCZmode: |
| 17054 | return CCmode; |
| 17055 | } |
| 17056 | |
| 17057 | case E_CCFPmode: |
| 17058 | /* These are only compatible with themselves, which we already |
| 17059 | checked above. */ |
| 17060 | return VOIDmode; |
| 17061 | } |
| 17062 | } |
| 17063 | |
| 17064 | /* Return strategy to use for floating-point. We assume that fcomi is always |
| 17065 | preferrable where available, since that is also true when looking at size |
| 17066 | (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */ |
| 17067 | |
| 17068 | enum ix86_fpcmp_strategy |
| 17069 | ix86_fp_comparison_strategy (enum rtx_code) |
| 17070 | { |
| 17071 | /* Do fcomi/sahf based test when profitable. */ |
| 17072 | |
| 17073 | if (TARGET_CMOVE) |
| 17074 | return IX86_FPCMP_COMI; |
| 17075 | |
| 17076 | if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) |
| 17077 | return IX86_FPCMP_SAHF; |
| 17078 | |
| 17079 | return IX86_FPCMP_ARITH; |
| 17080 | } |
| 17081 | |
| 17082 | /* Convert comparison codes we use to represent FP comparison to integer |
| 17083 | code that will result in proper branch. Return UNKNOWN if no such code |
| 17084 | is available. */ |
| 17085 | |
| 17086 | enum rtx_code |
| 17087 | ix86_fp_compare_code_to_integer (enum rtx_code code) |
| 17088 | { |
| 17089 | switch (code) |
| 17090 | { |
| 17091 | case GT: |
| 17092 | return GTU; |
| 17093 | case GE: |
| 17094 | return GEU; |
| 17095 | case ORDERED: |
| 17096 | case UNORDERED: |
| 17097 | return code; |
| 17098 | case UNEQ: |
| 17099 | return EQ; |
| 17100 | case UNLT: |
| 17101 | return LTU; |
| 17102 | case UNLE: |
| 17103 | return LEU; |
| 17104 | case LTGT: |
| 17105 | return NE; |
| 17106 | case EQ: |
| 17107 | case NE: |
| 17108 | if (TARGET_AVX10_2) |
| 17109 | return code; |
| 17110 | /* FALLTHRU. */ |
| 17111 | default: |
| 17112 | return UNKNOWN; |
| 17113 | } |
| 17114 | } |
| 17115 | |
| 17116 | /* Zero extend possibly SImode EXP to Pmode register. */ |
| 17117 | rtx |
| 17118 | ix86_zero_extend_to_Pmode (rtx exp) |
| 17119 | { |
| 17120 | return force_reg (Pmode, convert_to_mode (Pmode, exp, 1)); |
| 17121 | } |
| 17122 | |
| 17123 | /* Return true if the function is called via PLT. */ |
| 17124 | |
| 17125 | bool |
| 17126 | ix86_call_use_plt_p (rtx call_op) |
| 17127 | { |
| 17128 | if (SYMBOL_REF_LOCAL_P (call_op)) |
| 17129 | { |
| 17130 | if (SYMBOL_REF_DECL (call_op) |
| 17131 | && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL) |
| 17132 | { |
| 17133 | /* NB: All ifunc functions must be called via PLT. */ |
| 17134 | cgraph_node *node |
| 17135 | = cgraph_node::get (SYMBOL_REF_DECL (call_op)); |
| 17136 | if (node && node->ifunc_resolver) |
| 17137 | return true; |
| 17138 | } |
| 17139 | return false; |
| 17140 | } |
| 17141 | return true; |
| 17142 | } |
| 17143 | |
| 17144 | /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true, |
| 17145 | the PLT entry will be used as the function address for local IFUNC |
| 17146 | functions. When the PIC register is needed for PLT call, indirect |
| 17147 | call via the PLT entry will fail since the PIC register may not be |
| 17148 | set up properly for indirect call. In this case, we should return |
| 17149 | false. */ |
| 17150 | |
| 17151 | static bool |
| 17152 | ix86_ifunc_ref_local_ok (void) |
| 17153 | { |
| 17154 | return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC); |
| 17155 | } |
| 17156 | |
| 17157 | /* Return true if the function being called was marked with attribute |
| 17158 | "noplt" or using -fno-plt and we are compiling for non-PIC. We need |
| 17159 | to handle the non-PIC case in the backend because there is no easy |
| 17160 | interface for the front-end to force non-PLT calls to use the GOT. |
| 17161 | This is currently used only with 64-bit or 32-bit GOT32X ELF targets |
| 17162 | to call the function marked "noplt" indirectly. */ |
| 17163 | |
| 17164 | bool |
| 17165 | ix86_nopic_noplt_attribute_p (rtx call_op) |
| 17166 | { |
| 17167 | if (flag_pic || ix86_cmodel == CM_LARGE |
| 17168 | || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X) |
| 17169 | || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF |
| 17170 | || SYMBOL_REF_LOCAL_P (call_op)) |
| 17171 | return false; |
| 17172 | |
| 17173 | tree symbol_decl = SYMBOL_REF_DECL (call_op); |
| 17174 | |
| 17175 | if (!flag_plt |
| 17176 | || (symbol_decl != NULL_TREE |
| 17177 | && lookup_attribute (attr_name: "noplt" , DECL_ATTRIBUTES (symbol_decl)))) |
| 17178 | return true; |
| 17179 | |
| 17180 | return false; |
| 17181 | } |
| 17182 | |
| 17183 | /* Helper to output the jmp/call. */ |
| 17184 | static void |
| 17185 | ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno) |
| 17186 | { |
| 17187 | if (thunk_name != NULL) |
| 17188 | { |
| 17189 | if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno)) |
| 17190 | && ix86_indirect_branch_cs_prefix) |
| 17191 | fprintf (stream: asm_out_file, format: "\tcs\n" ); |
| 17192 | fprintf (stream: asm_out_file, format: "\tjmp\t" ); |
| 17193 | assemble_name (asm_out_file, thunk_name); |
| 17194 | putc (c: '\n', stream: asm_out_file); |
| 17195 | if ((ix86_harden_sls & harden_sls_indirect_jmp)) |
| 17196 | fputs (s: "\tint3\n" , stream: asm_out_file); |
| 17197 | } |
| 17198 | else |
| 17199 | output_indirect_thunk (regno); |
| 17200 | } |
| 17201 | |
| 17202 | /* Output indirect branch via a call and return thunk. CALL_OP is a |
| 17203 | register which contains the branch target. XASM is the assembly |
| 17204 | template for CALL_OP. Branch is a tail call if SIBCALL_P is true. |
| 17205 | A normal call is converted to: |
| 17206 | |
| 17207 | call __x86_indirect_thunk_reg |
| 17208 | |
| 17209 | and a tail call is converted to: |
| 17210 | |
| 17211 | jmp __x86_indirect_thunk_reg |
| 17212 | */ |
| 17213 | |
| 17214 | static void |
| 17215 | ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p) |
| 17216 | { |
| 17217 | char thunk_name_buf[32]; |
| 17218 | char *thunk_name; |
| 17219 | enum indirect_thunk_prefix need_prefix |
| 17220 | = indirect_thunk_need_prefix (insn: current_output_insn); |
| 17221 | int regno = REGNO (call_op); |
| 17222 | |
| 17223 | if (cfun->machine->indirect_branch_type |
| 17224 | != indirect_branch_thunk_inline) |
| 17225 | { |
| 17226 | if (cfun->machine->indirect_branch_type == indirect_branch_thunk) |
| 17227 | SET_HARD_REG_BIT (set&: indirect_thunks_used, bit: regno); |
| 17228 | |
| 17229 | indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false); |
| 17230 | thunk_name = thunk_name_buf; |
| 17231 | } |
| 17232 | else |
| 17233 | thunk_name = NULL; |
| 17234 | |
| 17235 | if (sibcall_p) |
| 17236 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
| 17237 | else |
| 17238 | { |
| 17239 | if (thunk_name != NULL) |
| 17240 | { |
| 17241 | if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno)) |
| 17242 | && ix86_indirect_branch_cs_prefix) |
| 17243 | fprintf (stream: asm_out_file, format: "\tcs\n" ); |
| 17244 | fprintf (stream: asm_out_file, format: "\tcall\t" ); |
| 17245 | assemble_name (asm_out_file, thunk_name); |
| 17246 | putc (c: '\n', stream: asm_out_file); |
| 17247 | return; |
| 17248 | } |
| 17249 | |
| 17250 | char indirectlabel1[32]; |
| 17251 | char indirectlabel2[32]; |
| 17252 | |
| 17253 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, |
| 17254 | INDIRECT_LABEL, |
| 17255 | indirectlabelno++); |
| 17256 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, |
| 17257 | INDIRECT_LABEL, |
| 17258 | indirectlabelno++); |
| 17259 | |
| 17260 | /* Jump. */ |
| 17261 | fputs (s: "\tjmp\t" , stream: asm_out_file); |
| 17262 | assemble_name_raw (asm_out_file, indirectlabel2); |
| 17263 | fputc (c: '\n', stream: asm_out_file); |
| 17264 | |
| 17265 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); |
| 17266 | |
| 17267 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
| 17268 | |
| 17269 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); |
| 17270 | |
| 17271 | /* Call. */ |
| 17272 | fputs (s: "\tcall\t" , stream: asm_out_file); |
| 17273 | assemble_name_raw (asm_out_file, indirectlabel1); |
| 17274 | fputc (c: '\n', stream: asm_out_file); |
| 17275 | } |
| 17276 | } |
| 17277 | |
| 17278 | /* Output indirect branch via a call and return thunk. CALL_OP is |
| 17279 | the branch target. XASM is the assembly template for CALL_OP. |
| 17280 | Branch is a tail call if SIBCALL_P is true. A normal call is |
| 17281 | converted to: |
| 17282 | |
| 17283 | jmp L2 |
| 17284 | L1: |
| 17285 | push CALL_OP |
| 17286 | jmp __x86_indirect_thunk |
| 17287 | L2: |
| 17288 | call L1 |
| 17289 | |
| 17290 | and a tail call is converted to: |
| 17291 | |
| 17292 | push CALL_OP |
| 17293 | jmp __x86_indirect_thunk |
| 17294 | */ |
| 17295 | |
| 17296 | static void |
| 17297 | ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm, |
| 17298 | bool sibcall_p) |
| 17299 | { |
| 17300 | char thunk_name_buf[32]; |
| 17301 | char *thunk_name; |
| 17302 | char push_buf[64]; |
| 17303 | enum indirect_thunk_prefix need_prefix |
| 17304 | = indirect_thunk_need_prefix (insn: current_output_insn); |
| 17305 | int regno = -1; |
| 17306 | |
| 17307 | if (cfun->machine->indirect_branch_type |
| 17308 | != indirect_branch_thunk_inline) |
| 17309 | { |
| 17310 | if (cfun->machine->indirect_branch_type == indirect_branch_thunk) |
| 17311 | indirect_thunk_needed = true; |
| 17312 | indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false); |
| 17313 | thunk_name = thunk_name_buf; |
| 17314 | } |
| 17315 | else |
| 17316 | thunk_name = NULL; |
| 17317 | |
| 17318 | snprintf (s: push_buf, maxlen: sizeof (push_buf), format: "push{%c}\t%s" , |
| 17319 | TARGET_64BIT ? 'q' : 'l', xasm); |
| 17320 | |
| 17321 | if (sibcall_p) |
| 17322 | { |
| 17323 | output_asm_insn (push_buf, &call_op); |
| 17324 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
| 17325 | } |
| 17326 | else |
| 17327 | { |
| 17328 | char indirectlabel1[32]; |
| 17329 | char indirectlabel2[32]; |
| 17330 | |
| 17331 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, |
| 17332 | INDIRECT_LABEL, |
| 17333 | indirectlabelno++); |
| 17334 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, |
| 17335 | INDIRECT_LABEL, |
| 17336 | indirectlabelno++); |
| 17337 | |
| 17338 | /* Jump. */ |
| 17339 | fputs (s: "\tjmp\t" , stream: asm_out_file); |
| 17340 | assemble_name_raw (asm_out_file, indirectlabel2); |
| 17341 | fputc (c: '\n', stream: asm_out_file); |
| 17342 | |
| 17343 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); |
| 17344 | |
| 17345 | /* An external function may be called via GOT, instead of PLT. */ |
| 17346 | if (MEM_P (call_op)) |
| 17347 | { |
| 17348 | struct ix86_address parts; |
| 17349 | rtx addr = XEXP (call_op, 0); |
| 17350 | if (ix86_decompose_address (addr, out: &parts) |
| 17351 | && parts.base == stack_pointer_rtx) |
| 17352 | { |
| 17353 | /* Since call will adjust stack by -UNITS_PER_WORD, |
| 17354 | we must convert "disp(stack, index, scale)" to |
| 17355 | "disp+UNITS_PER_WORD(stack, index, scale)". */ |
| 17356 | if (parts.index) |
| 17357 | { |
| 17358 | addr = gen_rtx_MULT (Pmode, parts.index, |
| 17359 | GEN_INT (parts.scale)); |
| 17360 | addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
| 17361 | addr); |
| 17362 | } |
| 17363 | else |
| 17364 | addr = stack_pointer_rtx; |
| 17365 | |
| 17366 | rtx disp; |
| 17367 | if (parts.disp != NULL_RTX) |
| 17368 | disp = plus_constant (Pmode, parts.disp, |
| 17369 | UNITS_PER_WORD); |
| 17370 | else |
| 17371 | disp = GEN_INT (UNITS_PER_WORD); |
| 17372 | |
| 17373 | addr = gen_rtx_PLUS (Pmode, addr, disp); |
| 17374 | call_op = gen_rtx_MEM (GET_MODE (call_op), addr); |
| 17375 | } |
| 17376 | } |
| 17377 | |
| 17378 | output_asm_insn (push_buf, &call_op); |
| 17379 | |
| 17380 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
| 17381 | |
| 17382 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); |
| 17383 | |
| 17384 | /* Call. */ |
| 17385 | fputs (s: "\tcall\t" , stream: asm_out_file); |
| 17386 | assemble_name_raw (asm_out_file, indirectlabel1); |
| 17387 | fputc (c: '\n', stream: asm_out_file); |
| 17388 | } |
| 17389 | } |
| 17390 | |
| 17391 | /* Output indirect branch via a call and return thunk. CALL_OP is |
| 17392 | the branch target. XASM is the assembly template for CALL_OP. |
| 17393 | Branch is a tail call if SIBCALL_P is true. */ |
| 17394 | |
| 17395 | static void |
| 17396 | ix86_output_indirect_branch (rtx call_op, const char *xasm, |
| 17397 | bool sibcall_p) |
| 17398 | { |
| 17399 | if (REG_P (call_op)) |
| 17400 | ix86_output_indirect_branch_via_reg (call_op, sibcall_p); |
| 17401 | else |
| 17402 | ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p); |
| 17403 | } |
| 17404 | |
| 17405 | /* Output indirect jump. CALL_OP is the jump target. */ |
| 17406 | |
| 17407 | const char * |
| 17408 | ix86_output_indirect_jmp (rtx call_op) |
| 17409 | { |
| 17410 | if (cfun->machine->indirect_branch_type != indirect_branch_keep) |
| 17411 | { |
| 17412 | /* We can't have red-zone since "call" in the indirect thunk |
| 17413 | pushes the return address onto stack, destroying red-zone. */ |
| 17414 | if (ix86_red_zone_used) |
| 17415 | gcc_unreachable (); |
| 17416 | |
| 17417 | ix86_output_indirect_branch (call_op, xasm: "%0" , sibcall_p: true); |
| 17418 | } |
| 17419 | else |
| 17420 | output_asm_insn ("%!jmp\t%A0" , &call_op); |
| 17421 | return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "" ; |
| 17422 | } |
| 17423 | |
| 17424 | /* Output return instrumentation for current function if needed. */ |
| 17425 | |
| 17426 | static void |
| 17427 | output_return_instrumentation (void) |
| 17428 | { |
| 17429 | if (ix86_instrument_return != instrument_return_none |
| 17430 | && flag_fentry |
| 17431 | && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl)) |
| 17432 | { |
| 17433 | if (ix86_flag_record_return) |
| 17434 | fprintf (stream: asm_out_file, format: "1:\n" ); |
| 17435 | switch (ix86_instrument_return) |
| 17436 | { |
| 17437 | case instrument_return_call: |
| 17438 | fprintf (stream: asm_out_file, format: "\tcall\t__return__\n" ); |
| 17439 | break; |
| 17440 | case instrument_return_nop5: |
| 17441 | /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ |
| 17442 | fprintf (stream: asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n" ); |
| 17443 | break; |
| 17444 | case instrument_return_none: |
| 17445 | break; |
| 17446 | } |
| 17447 | |
| 17448 | if (ix86_flag_record_return) |
| 17449 | { |
| 17450 | fprintf (stream: asm_out_file, format: "\t.section __return_loc, \"a\",@progbits\n" ); |
| 17451 | fprintf (stream: asm_out_file, format: "\t.%s 1b\n" , TARGET_64BIT ? "quad" : "long" ); |
| 17452 | fprintf (stream: asm_out_file, format: "\t.previous\n" ); |
| 17453 | } |
| 17454 | } |
| 17455 | } |
| 17456 | |
| 17457 | /* Output function return. CALL_OP is the jump target. Add a REP |
| 17458 | prefix to RET if LONG_P is true and function return is kept. */ |
| 17459 | |
| 17460 | const char * |
| 17461 | ix86_output_function_return (bool long_p) |
| 17462 | { |
| 17463 | output_return_instrumentation (); |
| 17464 | |
| 17465 | if (cfun->machine->function_return_type != indirect_branch_keep) |
| 17466 | { |
| 17467 | char thunk_name[32]; |
| 17468 | enum indirect_thunk_prefix need_prefix |
| 17469 | = indirect_thunk_need_prefix (insn: current_output_insn); |
| 17470 | |
| 17471 | if (cfun->machine->function_return_type |
| 17472 | != indirect_branch_thunk_inline) |
| 17473 | { |
| 17474 | bool need_thunk = (cfun->machine->function_return_type |
| 17475 | == indirect_branch_thunk); |
| 17476 | indirect_thunk_name (name: thunk_name, INVALID_REGNUM, need_prefix, |
| 17477 | ret_p: true); |
| 17478 | indirect_return_needed |= need_thunk; |
| 17479 | fprintf (stream: asm_out_file, format: "\tjmp\t" ); |
| 17480 | assemble_name (asm_out_file, thunk_name); |
| 17481 | putc (c: '\n', stream: asm_out_file); |
| 17482 | } |
| 17483 | else |
| 17484 | output_indirect_thunk (INVALID_REGNUM); |
| 17485 | |
| 17486 | return "" ; |
| 17487 | } |
| 17488 | |
| 17489 | output_asm_insn (long_p ? "rep%; ret" : "ret" , nullptr); |
| 17490 | return (ix86_harden_sls & harden_sls_return) ? "int3" : "" ; |
| 17491 | } |
| 17492 | |
| 17493 | /* Output indirect function return. RET_OP is the function return |
| 17494 | target. */ |
| 17495 | |
| 17496 | const char * |
| 17497 | ix86_output_indirect_function_return (rtx ret_op) |
| 17498 | { |
| 17499 | if (cfun->machine->function_return_type != indirect_branch_keep) |
| 17500 | { |
| 17501 | char thunk_name[32]; |
| 17502 | enum indirect_thunk_prefix need_prefix |
| 17503 | = indirect_thunk_need_prefix (insn: current_output_insn); |
| 17504 | unsigned int regno = REGNO (ret_op); |
| 17505 | gcc_assert (regno == CX_REG); |
| 17506 | |
| 17507 | if (cfun->machine->function_return_type |
| 17508 | != indirect_branch_thunk_inline) |
| 17509 | { |
| 17510 | bool need_thunk = (cfun->machine->function_return_type |
| 17511 | == indirect_branch_thunk); |
| 17512 | indirect_thunk_name (name: thunk_name, regno, need_prefix, ret_p: true); |
| 17513 | |
| 17514 | if (need_thunk) |
| 17515 | { |
| 17516 | indirect_return_via_cx = true; |
| 17517 | SET_HARD_REG_BIT (set&: indirect_thunks_used, CX_REG); |
| 17518 | } |
| 17519 | fprintf (stream: asm_out_file, format: "\tjmp\t" ); |
| 17520 | assemble_name (asm_out_file, thunk_name); |
| 17521 | putc (c: '\n', stream: asm_out_file); |
| 17522 | } |
| 17523 | else |
| 17524 | output_indirect_thunk (regno); |
| 17525 | } |
| 17526 | else |
| 17527 | { |
| 17528 | output_asm_insn ("%!jmp\t%A0" , &ret_op); |
| 17529 | if (ix86_harden_sls & harden_sls_indirect_jmp) |
| 17530 | fputs (s: "\tint3\n" , stream: asm_out_file); |
| 17531 | } |
| 17532 | return "" ; |
| 17533 | } |
| 17534 | |
| 17535 | /* Output the assembly for a call instruction. */ |
| 17536 | |
| 17537 | const char * |
| 17538 | ix86_output_call_insn (rtx_insn *insn, rtx call_op) |
| 17539 | { |
| 17540 | bool direct_p = constant_call_address_operand (call_op, VOIDmode); |
| 17541 | bool output_indirect_p |
| 17542 | = (!TARGET_SEH |
| 17543 | && cfun->machine->indirect_branch_type != indirect_branch_keep); |
| 17544 | bool seh_nop_p = false; |
| 17545 | const char *xasm; |
| 17546 | |
| 17547 | if (SIBLING_CALL_P (insn)) |
| 17548 | { |
| 17549 | output_return_instrumentation (); |
| 17550 | if (direct_p) |
| 17551 | { |
| 17552 | if (ix86_nopic_noplt_attribute_p (call_op)) |
| 17553 | { |
| 17554 | direct_p = false; |
| 17555 | if (TARGET_64BIT) |
| 17556 | { |
| 17557 | if (output_indirect_p) |
| 17558 | xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}" ; |
| 17559 | else |
| 17560 | xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}" ; |
| 17561 | } |
| 17562 | else |
| 17563 | { |
| 17564 | if (output_indirect_p) |
| 17565 | xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}" ; |
| 17566 | else |
| 17567 | xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}" ; |
| 17568 | } |
| 17569 | } |
| 17570 | else |
| 17571 | xasm = "%!jmp\t%P0" ; |
| 17572 | } |
| 17573 | /* SEH epilogue detection requires the indirect branch case |
| 17574 | to include REX.W. */ |
| 17575 | else if (TARGET_SEH) |
| 17576 | xasm = "%!rex.W jmp\t%A0" ; |
| 17577 | else |
| 17578 | { |
| 17579 | if (output_indirect_p) |
| 17580 | xasm = "%0" ; |
| 17581 | else |
| 17582 | xasm = "%!jmp\t%A0" ; |
| 17583 | } |
| 17584 | |
| 17585 | if (output_indirect_p && !direct_p) |
| 17586 | ix86_output_indirect_branch (call_op, xasm, sibcall_p: true); |
| 17587 | else |
| 17588 | { |
| 17589 | output_asm_insn (xasm, &call_op); |
| 17590 | if (!direct_p |
| 17591 | && (ix86_harden_sls & harden_sls_indirect_jmp)) |
| 17592 | return "int3" ; |
| 17593 | } |
| 17594 | return "" ; |
| 17595 | } |
| 17596 | |
| 17597 | /* SEH unwinding can require an extra nop to be emitted in several |
| 17598 | circumstances. Determine if we have one of those. */ |
| 17599 | if (TARGET_SEH) |
| 17600 | { |
| 17601 | rtx_insn *i; |
| 17602 | |
| 17603 | for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (insn: i)) |
| 17604 | { |
| 17605 | /* Prevent a catch region from being adjacent to a jump that would |
| 17606 | be interpreted as an epilogue sequence by the unwinder. */ |
| 17607 | if (JUMP_P(i) && CROSSING_JUMP_P (i)) |
| 17608 | { |
| 17609 | seh_nop_p = true; |
| 17610 | break; |
| 17611 | } |
| 17612 | |
| 17613 | /* If we get to another real insn, we don't need the nop. */ |
| 17614 | if (INSN_P (i)) |
| 17615 | break; |
| 17616 | |
| 17617 | /* If we get to the epilogue note, prevent a catch region from |
| 17618 | being adjacent to the standard epilogue sequence. Note that, |
| 17619 | if non-call exceptions are enabled, we already did it during |
| 17620 | epilogue expansion, or else, if the insn can throw internally, |
| 17621 | we already did it during the reorg pass. */ |
| 17622 | if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG |
| 17623 | && !flag_non_call_exceptions |
| 17624 | && !can_throw_internal (insn)) |
| 17625 | { |
| 17626 | seh_nop_p = true; |
| 17627 | break; |
| 17628 | } |
| 17629 | } |
| 17630 | |
| 17631 | /* If we didn't find a real insn following the call, prevent the |
| 17632 | unwinder from looking into the next function. */ |
| 17633 | if (i == NULL) |
| 17634 | seh_nop_p = true; |
| 17635 | } |
| 17636 | |
| 17637 | if (direct_p) |
| 17638 | { |
| 17639 | if (ix86_nopic_noplt_attribute_p (call_op)) |
| 17640 | { |
| 17641 | direct_p = false; |
| 17642 | if (TARGET_64BIT) |
| 17643 | { |
| 17644 | if (output_indirect_p) |
| 17645 | xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}" ; |
| 17646 | else |
| 17647 | xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}" ; |
| 17648 | } |
| 17649 | else |
| 17650 | { |
| 17651 | if (output_indirect_p) |
| 17652 | xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}" ; |
| 17653 | else |
| 17654 | xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}" ; |
| 17655 | } |
| 17656 | } |
| 17657 | else |
| 17658 | xasm = "%!call\t%P0" ; |
| 17659 | } |
| 17660 | else |
| 17661 | { |
| 17662 | if (output_indirect_p) |
| 17663 | xasm = "%0" ; |
| 17664 | else |
| 17665 | xasm = "%!call\t%A0" ; |
| 17666 | } |
| 17667 | |
| 17668 | if (output_indirect_p && !direct_p) |
| 17669 | ix86_output_indirect_branch (call_op, xasm, sibcall_p: false); |
| 17670 | else |
| 17671 | output_asm_insn (xasm, &call_op); |
| 17672 | |
| 17673 | if (seh_nop_p) |
| 17674 | return "nop" ; |
| 17675 | |
| 17676 | return "" ; |
| 17677 | } |
| 17678 | |
| 17679 | /* Return a MEM corresponding to a stack slot with mode MODE. |
| 17680 | Allocate a new slot if necessary. |
| 17681 | |
| 17682 | The RTL for a function can have several slots available: N is |
| 17683 | which slot to use. */ |
| 17684 | |
| 17685 | rtx |
| 17686 | assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n) |
| 17687 | { |
| 17688 | struct stack_local_entry *s; |
| 17689 | |
| 17690 | gcc_assert (n < MAX_386_STACK_LOCALS); |
| 17691 | |
| 17692 | for (s = ix86_stack_locals; s; s = s->next) |
| 17693 | if (s->mode == mode && s->n == n) |
| 17694 | return validize_mem (copy_rtx (s->rtl)); |
| 17695 | |
| 17696 | int align = 0; |
| 17697 | /* For DImode with SLOT_FLOATxFDI_387 use 32-bit |
| 17698 | alignment with -m32 -mpreferred-stack-boundary=2. */ |
| 17699 | if (mode == DImode |
| 17700 | && !TARGET_64BIT |
| 17701 | && n == SLOT_FLOATxFDI_387 |
| 17702 | && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode)) |
| 17703 | align = 32; |
| 17704 | s = ggc_alloc<stack_local_entry> (); |
| 17705 | s->n = n; |
| 17706 | s->mode = mode; |
| 17707 | s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align); |
| 17708 | |
| 17709 | s->next = ix86_stack_locals; |
| 17710 | ix86_stack_locals = s; |
| 17711 | return validize_mem (copy_rtx (s->rtl)); |
| 17712 | } |
| 17713 | |
| 17714 | static void |
| 17715 | ix86_instantiate_decls (void) |
| 17716 | { |
| 17717 | struct stack_local_entry *s; |
| 17718 | |
| 17719 | for (s = ix86_stack_locals; s; s = s->next) |
| 17720 | if (s->rtl != NULL_RTX) |
| 17721 | instantiate_decl_rtl (x: s->rtl); |
| 17722 | } |
| 17723 | |
| 17724 | /* Check whether x86 address PARTS is a pc-relative address. */ |
| 17725 | |
| 17726 | bool |
| 17727 | ix86_rip_relative_addr_p (struct ix86_address *parts) |
| 17728 | { |
| 17729 | rtx base, index, disp; |
| 17730 | |
| 17731 | base = parts->base; |
| 17732 | index = parts->index; |
| 17733 | disp = parts->disp; |
| 17734 | |
| 17735 | if (disp && !base && !index) |
| 17736 | { |
| 17737 | if (TARGET_64BIT) |
| 17738 | { |
| 17739 | rtx symbol = disp; |
| 17740 | |
| 17741 | if (GET_CODE (disp) == CONST) |
| 17742 | symbol = XEXP (disp, 0); |
| 17743 | if (GET_CODE (symbol) == PLUS |
| 17744 | && CONST_INT_P (XEXP (symbol, 1))) |
| 17745 | symbol = XEXP (symbol, 0); |
| 17746 | |
| 17747 | if (LABEL_REF_P (symbol) |
| 17748 | || (SYMBOL_REF_P (symbol) |
| 17749 | && SYMBOL_REF_TLS_MODEL (symbol) == 0) |
| 17750 | || (GET_CODE (symbol) == UNSPEC |
| 17751 | && (XINT (symbol, 1) == UNSPEC_GOTPCREL |
| 17752 | || XINT (symbol, 1) == UNSPEC_PCREL |
| 17753 | || XINT (symbol, 1) == UNSPEC_GOTNTPOFF))) |
| 17754 | return true; |
| 17755 | } |
| 17756 | } |
| 17757 | return false; |
| 17758 | } |
| 17759 | |
| 17760 | /* Calculate the length of the memory address in the instruction encoding. |
| 17761 | Includes addr32 prefix, does not include the one-byte modrm, opcode, |
| 17762 | or other prefixes. We never generate addr32 prefix for LEA insn. */ |
| 17763 | |
| 17764 | int |
| 17765 | memory_address_length (rtx addr, bool lea) |
| 17766 | { |
| 17767 | struct ix86_address parts; |
| 17768 | rtx base, index, disp; |
| 17769 | int len; |
| 17770 | int ok; |
| 17771 | |
| 17772 | if (GET_CODE (addr) == PRE_DEC |
| 17773 | || GET_CODE (addr) == POST_INC |
| 17774 | || GET_CODE (addr) == PRE_MODIFY |
| 17775 | || GET_CODE (addr) == POST_MODIFY) |
| 17776 | return 0; |
| 17777 | |
| 17778 | ok = ix86_decompose_address (addr, out: &parts); |
| 17779 | gcc_assert (ok); |
| 17780 | |
| 17781 | len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1; |
| 17782 | |
| 17783 | /* If this is not LEA instruction, add the length of addr32 prefix. */ |
| 17784 | if (TARGET_64BIT && !lea |
| 17785 | && (SImode_address_operand (addr, VOIDmode) |
| 17786 | || (parts.base && GET_MODE (parts.base) == SImode) |
| 17787 | || (parts.index && GET_MODE (parts.index) == SImode))) |
| 17788 | len++; |
| 17789 | |
| 17790 | base = parts.base; |
| 17791 | index = parts.index; |
| 17792 | disp = parts.disp; |
| 17793 | |
| 17794 | if (base && SUBREG_P (base)) |
| 17795 | base = SUBREG_REG (base); |
| 17796 | if (index && SUBREG_P (index)) |
| 17797 | index = SUBREG_REG (index); |
| 17798 | |
| 17799 | gcc_assert (base == NULL_RTX || REG_P (base)); |
| 17800 | gcc_assert (index == NULL_RTX || REG_P (index)); |
| 17801 | |
| 17802 | /* Rule of thumb: |
| 17803 | - esp as the base always wants an index, |
| 17804 | - ebp as the base always wants a displacement, |
| 17805 | - r12 as the base always wants an index, |
| 17806 | - r13 as the base always wants a displacement. */ |
| 17807 | |
| 17808 | /* Register Indirect. */ |
| 17809 | if (base && !index && !disp) |
| 17810 | { |
| 17811 | /* esp (for its index) and ebp (for its displacement) need |
| 17812 | the two-byte modrm form. Similarly for r12 and r13 in 64-bit |
| 17813 | code. */ |
| 17814 | if (base == arg_pointer_rtx |
| 17815 | || base == frame_pointer_rtx |
| 17816 | || REGNO (base) == SP_REG |
| 17817 | || REGNO (base) == BP_REG |
| 17818 | || REGNO (base) == R12_REG |
| 17819 | || REGNO (base) == R13_REG) |
| 17820 | len++; |
| 17821 | } |
| 17822 | |
| 17823 | /* Direct Addressing. In 64-bit mode mod 00 r/m 5 |
| 17824 | is not disp32, but disp32(%rip), so for disp32 |
| 17825 | SIB byte is needed, unless print_operand_address |
| 17826 | optimizes it into disp32(%rip) or (%rip) is implied |
| 17827 | by UNSPEC. */ |
| 17828 | else if (disp && !base && !index) |
| 17829 | { |
| 17830 | len += 4; |
| 17831 | if (!ix86_rip_relative_addr_p (parts: &parts)) |
| 17832 | len++; |
| 17833 | } |
| 17834 | else |
| 17835 | { |
| 17836 | /* Find the length of the displacement constant. */ |
| 17837 | if (disp) |
| 17838 | { |
| 17839 | if (base && satisfies_constraint_K (op: disp)) |
| 17840 | len += 1; |
| 17841 | else |
| 17842 | len += 4; |
| 17843 | } |
| 17844 | /* ebp always wants a displacement. Similarly r13. */ |
| 17845 | else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG)) |
| 17846 | len++; |
| 17847 | |
| 17848 | /* An index requires the two-byte modrm form.... */ |
| 17849 | if (index |
| 17850 | /* ...like esp (or r12), which always wants an index. */ |
| 17851 | || base == arg_pointer_rtx |
| 17852 | || base == frame_pointer_rtx |
| 17853 | || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG))) |
| 17854 | len++; |
| 17855 | } |
| 17856 | |
| 17857 | return len; |
| 17858 | } |
| 17859 | |
| 17860 | /* Compute default value for "length_immediate" attribute. When SHORTFORM |
| 17861 | is set, expect that insn have 8bit immediate alternative. */ |
| 17862 | int |
| 17863 | ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform) |
| 17864 | { |
| 17865 | int len = 0; |
| 17866 | int i; |
| 17867 | extract_insn_cached (insn); |
| 17868 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
| 17869 | if (CONSTANT_P (recog_data.operand[i])) |
| 17870 | { |
| 17871 | enum attr_mode mode = get_attr_mode (insn); |
| 17872 | |
| 17873 | gcc_assert (!len); |
| 17874 | if (shortform && CONST_INT_P (recog_data.operand[i])) |
| 17875 | { |
| 17876 | HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]); |
| 17877 | switch (mode) |
| 17878 | { |
| 17879 | case MODE_QI: |
| 17880 | len = 1; |
| 17881 | continue; |
| 17882 | case MODE_HI: |
| 17883 | ival = trunc_int_for_mode (ival, HImode); |
| 17884 | break; |
| 17885 | case MODE_SI: |
| 17886 | ival = trunc_int_for_mode (ival, SImode); |
| 17887 | break; |
| 17888 | default: |
| 17889 | break; |
| 17890 | } |
| 17891 | if (IN_RANGE (ival, -128, 127)) |
| 17892 | { |
| 17893 | len = 1; |
| 17894 | continue; |
| 17895 | } |
| 17896 | } |
| 17897 | switch (mode) |
| 17898 | { |
| 17899 | case MODE_QI: |
| 17900 | len = 1; |
| 17901 | break; |
| 17902 | case MODE_HI: |
| 17903 | len = 2; |
| 17904 | break; |
| 17905 | case MODE_SI: |
| 17906 | len = 4; |
| 17907 | break; |
| 17908 | /* Immediates for DImode instructions are encoded |
| 17909 | as 32bit sign extended values. */ |
| 17910 | case MODE_DI: |
| 17911 | len = 4; |
| 17912 | break; |
| 17913 | default: |
| 17914 | fatal_insn ("unknown insn mode" , insn); |
| 17915 | } |
| 17916 | } |
| 17917 | return len; |
| 17918 | } |
| 17919 | |
| 17920 | /* Compute default value for "length_address" attribute. */ |
| 17921 | int |
| 17922 | ix86_attr_length_address_default (rtx_insn *insn) |
| 17923 | { |
| 17924 | int i; |
| 17925 | |
| 17926 | if (get_attr_type (insn) == TYPE_LEA) |
| 17927 | { |
| 17928 | rtx set = PATTERN (insn), addr; |
| 17929 | |
| 17930 | if (GET_CODE (set) == PARALLEL) |
| 17931 | set = XVECEXP (set, 0, 0); |
| 17932 | |
| 17933 | gcc_assert (GET_CODE (set) == SET); |
| 17934 | |
| 17935 | addr = SET_SRC (set); |
| 17936 | |
| 17937 | return memory_address_length (addr, lea: true); |
| 17938 | } |
| 17939 | |
| 17940 | extract_insn_cached (insn); |
| 17941 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
| 17942 | { |
| 17943 | rtx op = recog_data.operand[i]; |
| 17944 | if (MEM_P (op)) |
| 17945 | { |
| 17946 | constrain_operands_cached (insn, reload_completed); |
| 17947 | if (which_alternative != -1) |
| 17948 | { |
| 17949 | const char *constraints = recog_data.constraints[i]; |
| 17950 | int alt = which_alternative; |
| 17951 | |
| 17952 | while (*constraints == '=' || *constraints == '+') |
| 17953 | constraints++; |
| 17954 | while (alt-- > 0) |
| 17955 | while (*constraints++ != ',') |
| 17956 | ; |
| 17957 | /* Skip ignored operands. */ |
| 17958 | if (*constraints == 'X') |
| 17959 | continue; |
| 17960 | } |
| 17961 | |
| 17962 | int len = memory_address_length (XEXP (op, 0), lea: false); |
| 17963 | |
| 17964 | /* Account for segment prefix for non-default addr spaces. */ |
| 17965 | if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op))) |
| 17966 | len++; |
| 17967 | |
| 17968 | return len; |
| 17969 | } |
| 17970 | } |
| 17971 | return 0; |
| 17972 | } |
| 17973 | |
| 17974 | /* Compute default value for "length_vex" attribute. It includes |
| 17975 | 2 or 3 byte VEX prefix and 1 opcode byte. */ |
| 17976 | |
| 17977 | int |
| 17978 | ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode, |
| 17979 | bool has_vex_w) |
| 17980 | { |
| 17981 | int i, reg_only = 2 + 1; |
| 17982 | bool has_mem = false; |
| 17983 | |
| 17984 | /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 |
| 17985 | byte VEX prefix. */ |
| 17986 | if (!has_0f_opcode || has_vex_w) |
| 17987 | return 3 + 1; |
| 17988 | |
| 17989 | /* We can always use 2 byte VEX prefix in 32bit. */ |
| 17990 | if (!TARGET_64BIT) |
| 17991 | return 2 + 1; |
| 17992 | |
| 17993 | extract_insn_cached (insn); |
| 17994 | |
| 17995 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
| 17996 | if (REG_P (recog_data.operand[i])) |
| 17997 | { |
| 17998 | /* REX.W bit uses 3 byte VEX prefix. |
| 17999 | REX2 with vex use extended EVEX prefix length is 4-byte. */ |
| 18000 | if (GET_MODE (recog_data.operand[i]) == DImode |
| 18001 | && GENERAL_REG_P (recog_data.operand[i])) |
| 18002 | return 3 + 1; |
| 18003 | |
| 18004 | /* REX.B bit requires 3-byte VEX. Right here we don't know which |
| 18005 | operand will be encoded using VEX.B, so be conservative. |
| 18006 | REX2 with vex use extended EVEX prefix length is 4-byte. */ |
| 18007 | if (REX_INT_REGNO_P (recog_data.operand[i]) |
| 18008 | || REX2_INT_REGNO_P (recog_data.operand[i]) |
| 18009 | || REX_SSE_REGNO_P (recog_data.operand[i])) |
| 18010 | reg_only = 3 + 1; |
| 18011 | } |
| 18012 | else if (MEM_P (recog_data.operand[i])) |
| 18013 | { |
| 18014 | /* REX2.X or REX2.B bits use 3 byte VEX prefix. */ |
| 18015 | if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i])) |
| 18016 | return 4; |
| 18017 | |
| 18018 | /* REX.X or REX.B bits use 3 byte VEX prefix. */ |
| 18019 | if (x86_extended_reg_mentioned_p (recog_data.operand[i])) |
| 18020 | return 3 + 1; |
| 18021 | |
| 18022 | has_mem = true; |
| 18023 | } |
| 18024 | |
| 18025 | return has_mem ? 2 + 1 : reg_only; |
| 18026 | } |
| 18027 | |
| 18028 | |
| 18029 | static bool |
| 18030 | ix86_class_likely_spilled_p (reg_class_t); |
| 18031 | |
| 18032 | /* Returns true if lhs of insn is HW function argument register and set up |
| 18033 | is_spilled to true if it is likely spilled HW register. */ |
| 18034 | static bool |
| 18035 | insn_is_function_arg (rtx insn, bool* is_spilled) |
| 18036 | { |
| 18037 | rtx dst; |
| 18038 | |
| 18039 | if (!NONDEBUG_INSN_P (insn)) |
| 18040 | return false; |
| 18041 | /* Call instructions are not movable, ignore it. */ |
| 18042 | if (CALL_P (insn)) |
| 18043 | return false; |
| 18044 | insn = PATTERN (insn); |
| 18045 | if (GET_CODE (insn) == PARALLEL) |
| 18046 | insn = XVECEXP (insn, 0, 0); |
| 18047 | if (GET_CODE (insn) != SET) |
| 18048 | return false; |
| 18049 | dst = SET_DEST (insn); |
| 18050 | if (REG_P (dst) && HARD_REGISTER_P (dst) |
| 18051 | && ix86_function_arg_regno_p (REGNO (dst))) |
| 18052 | { |
| 18053 | /* Is it likely spilled HW register? */ |
| 18054 | if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst)) |
| 18055 | && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))) |
| 18056 | *is_spilled = true; |
| 18057 | return true; |
| 18058 | } |
| 18059 | return false; |
| 18060 | } |
| 18061 | |
| 18062 | /* Add output dependencies for chain of function adjacent arguments if only |
| 18063 | there is a move to likely spilled HW register. Return first argument |
| 18064 | if at least one dependence was added or NULL otherwise. */ |
| 18065 | static rtx_insn * |
| 18066 | add_parameter_dependencies (rtx_insn *call, rtx_insn *head) |
| 18067 | { |
| 18068 | rtx_insn *insn; |
| 18069 | rtx_insn *last = call; |
| 18070 | rtx_insn *first_arg = NULL; |
| 18071 | bool is_spilled = false; |
| 18072 | |
| 18073 | head = PREV_INSN (insn: head); |
| 18074 | |
| 18075 | /* Find nearest to call argument passing instruction. */ |
| 18076 | while (true) |
| 18077 | { |
| 18078 | last = PREV_INSN (insn: last); |
| 18079 | if (last == head) |
| 18080 | return NULL; |
| 18081 | if (!NONDEBUG_INSN_P (last)) |
| 18082 | continue; |
| 18083 | if (insn_is_function_arg (insn: last, is_spilled: &is_spilled)) |
| 18084 | break; |
| 18085 | return NULL; |
| 18086 | } |
| 18087 | |
| 18088 | first_arg = last; |
| 18089 | while (true) |
| 18090 | { |
| 18091 | insn = PREV_INSN (insn: last); |
| 18092 | if (!INSN_P (insn)) |
| 18093 | break; |
| 18094 | if (insn == head) |
| 18095 | break; |
| 18096 | if (!NONDEBUG_INSN_P (insn)) |
| 18097 | { |
| 18098 | last = insn; |
| 18099 | continue; |
| 18100 | } |
| 18101 | if (insn_is_function_arg (insn, is_spilled: &is_spilled)) |
| 18102 | { |
| 18103 | /* Add output depdendence between two function arguments if chain |
| 18104 | of output arguments contains likely spilled HW registers. */ |
| 18105 | if (is_spilled) |
| 18106 | add_dependence (first_arg, insn, REG_DEP_OUTPUT); |
| 18107 | first_arg = last = insn; |
| 18108 | } |
| 18109 | else |
| 18110 | break; |
| 18111 | } |
| 18112 | if (!is_spilled) |
| 18113 | return NULL; |
| 18114 | return first_arg; |
| 18115 | } |
| 18116 | |
| 18117 | /* Add output or anti dependency from insn to first_arg to restrict its code |
| 18118 | motion. */ |
| 18119 | static void |
| 18120 | avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn) |
| 18121 | { |
| 18122 | rtx set; |
| 18123 | rtx tmp; |
| 18124 | |
| 18125 | set = single_set (insn); |
| 18126 | if (!set) |
| 18127 | return; |
| 18128 | tmp = SET_DEST (set); |
| 18129 | if (REG_P (tmp)) |
| 18130 | { |
| 18131 | /* Add output dependency to the first function argument. */ |
| 18132 | add_dependence (first_arg, insn, REG_DEP_OUTPUT); |
| 18133 | return; |
| 18134 | } |
| 18135 | /* Add anti dependency. */ |
| 18136 | add_dependence (first_arg, insn, REG_DEP_ANTI); |
| 18137 | } |
| 18138 | |
| 18139 | /* Avoid cross block motion of function argument through adding dependency |
| 18140 | from the first non-jump instruction in bb. */ |
| 18141 | static void |
| 18142 | add_dependee_for_func_arg (rtx_insn *arg, basic_block bb) |
| 18143 | { |
| 18144 | rtx_insn *insn = BB_END (bb); |
| 18145 | |
| 18146 | while (insn) |
| 18147 | { |
| 18148 | if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn)) |
| 18149 | { |
| 18150 | rtx set = single_set (insn); |
| 18151 | if (set) |
| 18152 | { |
| 18153 | avoid_func_arg_motion (first_arg: arg, insn); |
| 18154 | return; |
| 18155 | } |
| 18156 | } |
| 18157 | if (insn == BB_HEAD (bb)) |
| 18158 | return; |
| 18159 | insn = PREV_INSN (insn); |
| 18160 | } |
| 18161 | } |
| 18162 | |
| 18163 | /* Hook for pre-reload schedule - avoid motion of function arguments |
| 18164 | passed in likely spilled HW registers. */ |
| 18165 | static void |
| 18166 | ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) |
| 18167 | { |
| 18168 | rtx_insn *insn; |
| 18169 | rtx_insn *first_arg = NULL; |
| 18170 | if (reload_completed) |
| 18171 | return; |
| 18172 | while (head != tail && DEBUG_INSN_P (head)) |
| 18173 | head = NEXT_INSN (insn: head); |
| 18174 | for (insn = tail; insn != head; insn = PREV_INSN (insn)) |
| 18175 | if (INSN_P (insn) && CALL_P (insn)) |
| 18176 | { |
| 18177 | first_arg = add_parameter_dependencies (call: insn, head); |
| 18178 | if (first_arg) |
| 18179 | { |
| 18180 | /* Add dependee for first argument to predecessors if only |
| 18181 | region contains more than one block. */ |
| 18182 | basic_block bb = BLOCK_FOR_INSN (insn); |
| 18183 | int rgn = CONTAINING_RGN (bb->index); |
| 18184 | int nr_blks = RGN_NR_BLOCKS (rgn); |
| 18185 | /* Skip trivial regions and region head blocks that can have |
| 18186 | predecessors outside of region. */ |
| 18187 | if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0) |
| 18188 | { |
| 18189 | edge e; |
| 18190 | edge_iterator ei; |
| 18191 | |
| 18192 | /* Regions are SCCs with the exception of selective |
| 18193 | scheduling with pipelining of outer blocks enabled. |
| 18194 | So also check that immediate predecessors of a non-head |
| 18195 | block are in the same region. */ |
| 18196 | FOR_EACH_EDGE (e, ei, bb->preds) |
| 18197 | { |
| 18198 | /* Avoid creating of loop-carried dependencies through |
| 18199 | using topological ordering in the region. */ |
| 18200 | if (rgn == CONTAINING_RGN (e->src->index) |
| 18201 | && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) |
| 18202 | add_dependee_for_func_arg (arg: first_arg, bb: e->src); |
| 18203 | } |
| 18204 | } |
| 18205 | insn = first_arg; |
| 18206 | if (insn == head) |
| 18207 | break; |
| 18208 | } |
| 18209 | } |
| 18210 | else if (first_arg) |
| 18211 | avoid_func_arg_motion (first_arg, insn); |
| 18212 | } |
| 18213 | |
| 18214 | /* Hook for pre-reload schedule - set priority of moves from likely spilled |
| 18215 | HW registers to maximum, to schedule them at soon as possible. These are |
| 18216 | moves from function argument registers at the top of the function entry |
| 18217 | and moves from function return value registers after call. */ |
| 18218 | static int |
| 18219 | ix86_adjust_priority (rtx_insn *insn, int priority) |
| 18220 | { |
| 18221 | rtx set; |
| 18222 | |
| 18223 | if (reload_completed) |
| 18224 | return priority; |
| 18225 | |
| 18226 | if (!NONDEBUG_INSN_P (insn)) |
| 18227 | return priority; |
| 18228 | |
| 18229 | set = single_set (insn); |
| 18230 | if (set) |
| 18231 | { |
| 18232 | rtx tmp = SET_SRC (set); |
| 18233 | if (REG_P (tmp) |
| 18234 | && HARD_REGISTER_P (tmp) |
| 18235 | && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp)) |
| 18236 | && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp)))) |
| 18237 | return current_sched_info->sched_max_insns_priority; |
| 18238 | } |
| 18239 | |
| 18240 | return priority; |
| 18241 | } |
| 18242 | |
| 18243 | /* Prepare for scheduling pass. */ |
| 18244 | static void |
| 18245 | ix86_sched_init_global (FILE *, int, int) |
| 18246 | { |
| 18247 | /* Install scheduling hooks for current CPU. Some of these hooks are used |
| 18248 | in time-critical parts of the scheduler, so we only set them up when |
| 18249 | they are actually used. */ |
| 18250 | switch (ix86_tune) |
| 18251 | { |
| 18252 | case PROCESSOR_CORE2: |
| 18253 | case PROCESSOR_NEHALEM: |
| 18254 | case PROCESSOR_SANDYBRIDGE: |
| 18255 | case PROCESSOR_HASWELL: |
| 18256 | case PROCESSOR_TREMONT: |
| 18257 | case PROCESSOR_ALDERLAKE: |
| 18258 | case PROCESSOR_GENERIC: |
| 18259 | /* Do not perform multipass scheduling for pre-reload schedule |
| 18260 | to save compile time. */ |
| 18261 | if (reload_completed) |
| 18262 | { |
| 18263 | ix86_core2i7_init_hooks (); |
| 18264 | break; |
| 18265 | } |
| 18266 | /* Fall through. */ |
| 18267 | default: |
| 18268 | targetm.sched.dfa_post_advance_cycle = NULL; |
| 18269 | targetm.sched.first_cycle_multipass_init = NULL; |
| 18270 | targetm.sched.first_cycle_multipass_begin = NULL; |
| 18271 | targetm.sched.first_cycle_multipass_issue = NULL; |
| 18272 | targetm.sched.first_cycle_multipass_backtrack = NULL; |
| 18273 | targetm.sched.first_cycle_multipass_end = NULL; |
| 18274 | targetm.sched.first_cycle_multipass_fini = NULL; |
| 18275 | break; |
| 18276 | } |
| 18277 | } |
| 18278 | |
| 18279 | |
| 18280 | /* Implement TARGET_STATIC_RTX_ALIGNMENT. */ |
| 18281 | |
| 18282 | static HOST_WIDE_INT |
| 18283 | ix86_static_rtx_alignment (machine_mode mode) |
| 18284 | { |
| 18285 | if (mode == DFmode) |
| 18286 | return 64; |
| 18287 | if (ALIGN_MODE_128 (mode)) |
| 18288 | return MAX (128, GET_MODE_ALIGNMENT (mode)); |
| 18289 | return GET_MODE_ALIGNMENT (mode); |
| 18290 | } |
| 18291 | |
| 18292 | /* Implement TARGET_CONSTANT_ALIGNMENT. */ |
| 18293 | |
| 18294 | static HOST_WIDE_INT |
| 18295 | ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align) |
| 18296 | { |
| 18297 | if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST |
| 18298 | || TREE_CODE (exp) == INTEGER_CST) |
| 18299 | { |
| 18300 | machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); |
| 18301 | HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode); |
| 18302 | return MAX (mode_align, align); |
| 18303 | } |
| 18304 | else if (!optimize_size && TREE_CODE (exp) == STRING_CST |
| 18305 | && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) |
| 18306 | return BITS_PER_WORD; |
| 18307 | |
| 18308 | return align; |
| 18309 | } |
| 18310 | |
| 18311 | /* Implement TARGET_EMPTY_RECORD_P. */ |
| 18312 | |
| 18313 | static bool |
| 18314 | ix86_is_empty_record (const_tree type) |
| 18315 | { |
| 18316 | if (!TARGET_64BIT) |
| 18317 | return false; |
| 18318 | return default_is_empty_record (type); |
| 18319 | } |
| 18320 | |
| 18321 | /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */ |
| 18322 | |
| 18323 | static void |
| 18324 | ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type) |
| 18325 | { |
| 18326 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
| 18327 | |
| 18328 | if (!cum->warn_empty) |
| 18329 | return; |
| 18330 | |
| 18331 | if (!TYPE_EMPTY_P (type)) |
| 18332 | return; |
| 18333 | |
| 18334 | /* Don't warn if the function isn't visible outside of the TU. */ |
| 18335 | if (cum->decl && !TREE_PUBLIC (cum->decl)) |
| 18336 | return; |
| 18337 | |
| 18338 | tree decl = cum->decl; |
| 18339 | if (!decl) |
| 18340 | /* If we don't know the target, look at the current TU. */ |
| 18341 | decl = current_function_decl; |
| 18342 | |
| 18343 | const_tree ctx = get_ultimate_context (decl); |
| 18344 | if (ctx == NULL_TREE |
| 18345 | || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) |
| 18346 | return; |
| 18347 | |
| 18348 | /* If the actual size of the type is zero, then there is no change |
| 18349 | in how objects of this size are passed. */ |
| 18350 | if (int_size_in_bytes (type) == 0) |
| 18351 | return; |
| 18352 | |
| 18353 | warning (OPT_Wabi, "empty class %qT parameter passing ABI " |
| 18354 | "changes in %<-fabi-version=12%> (GCC 8)" , type); |
| 18355 | |
| 18356 | /* Only warn once. */ |
| 18357 | cum->warn_empty = false; |
| 18358 | } |
| 18359 | |
| 18360 | /* This hook returns name of multilib ABI. */ |
| 18361 | |
| 18362 | static const char * |
| 18363 | ix86_get_multilib_abi_name (void) |
| 18364 | { |
| 18365 | if (!(TARGET_64BIT_P (ix86_isa_flags))) |
| 18366 | return "i386" ; |
| 18367 | else if (TARGET_X32_P (ix86_isa_flags)) |
| 18368 | return "x32" ; |
| 18369 | else |
| 18370 | return "x86_64" ; |
| 18371 | } |
| 18372 | |
| 18373 | /* Compute the alignment for a variable for Intel MCU psABI. TYPE is |
| 18374 | the data type, and ALIGN is the alignment that the object would |
| 18375 | ordinarily have. */ |
| 18376 | |
| 18377 | static int |
| 18378 | iamcu_alignment (tree type, int align) |
| 18379 | { |
| 18380 | machine_mode mode; |
| 18381 | |
| 18382 | if (align < 32 || TYPE_USER_ALIGN (type)) |
| 18383 | return align; |
| 18384 | |
| 18385 | /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4 |
| 18386 | bytes. */ |
| 18387 | type = strip_array_types (type); |
| 18388 | if (TYPE_ATOMIC (type)) |
| 18389 | return align; |
| 18390 | |
| 18391 | mode = TYPE_MODE (type); |
| 18392 | switch (GET_MODE_CLASS (mode)) |
| 18393 | { |
| 18394 | case MODE_INT: |
| 18395 | case MODE_COMPLEX_INT: |
| 18396 | case MODE_COMPLEX_FLOAT: |
| 18397 | case MODE_FLOAT: |
| 18398 | case MODE_DECIMAL_FLOAT: |
| 18399 | return 32; |
| 18400 | default: |
| 18401 | return align; |
| 18402 | } |
| 18403 | } |
| 18404 | |
| 18405 | /* Compute the alignment for a static variable. |
| 18406 | TYPE is the data type, and ALIGN is the alignment that |
| 18407 | the object would ordinarily have. The value of this function is used |
| 18408 | instead of that alignment to align the object. */ |
| 18409 | |
| 18410 | int |
| 18411 | ix86_data_alignment (tree type, unsigned int align, bool opt) |
| 18412 | { |
| 18413 | /* GCC 4.8 and earlier used to incorrectly assume this alignment even |
| 18414 | for symbols from other compilation units or symbols that don't need |
| 18415 | to bind locally. In order to preserve some ABI compatibility with |
| 18416 | those compilers, ensure we don't decrease alignment from what we |
| 18417 | used to assume. */ |
| 18418 | |
| 18419 | unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT); |
| 18420 | |
| 18421 | /* A data structure, equal or greater than the size of a cache line |
| 18422 | (64 bytes in the Pentium 4 and other recent Intel processors, including |
| 18423 | processors based on Intel Core microarchitecture) should be aligned |
| 18424 | so that its base address is a multiple of a cache line size. */ |
| 18425 | |
| 18426 | unsigned int max_align |
| 18427 | = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT); |
| 18428 | |
| 18429 | if (max_align < BITS_PER_WORD) |
| 18430 | max_align = BITS_PER_WORD; |
| 18431 | |
| 18432 | switch (ix86_align_data_type) |
| 18433 | { |
| 18434 | case ix86_align_data_type_abi: opt = false; break; |
| 18435 | case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break; |
| 18436 | case ix86_align_data_type_cacheline: break; |
| 18437 | } |
| 18438 | |
| 18439 | if (TARGET_IAMCU) |
| 18440 | align = iamcu_alignment (type, align); |
| 18441 | |
| 18442 | if (opt |
| 18443 | && AGGREGATE_TYPE_P (type) |
| 18444 | && TYPE_SIZE (type) |
| 18445 | && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) |
| 18446 | { |
| 18447 | if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align_compat) |
| 18448 | && align < max_align_compat) |
| 18449 | align = max_align_compat; |
| 18450 | if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align) |
| 18451 | && align < max_align) |
| 18452 | align = max_align; |
| 18453 | } |
| 18454 | |
| 18455 | /* x86-64 ABI requires arrays greater than 16 bytes to be aligned |
| 18456 | to 16byte boundary. */ |
| 18457 | if (TARGET_64BIT) |
| 18458 | { |
| 18459 | if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) |
| 18460 | && TYPE_SIZE (type) |
| 18461 | && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST |
| 18462 | && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128) |
| 18463 | && align < 128) |
| 18464 | return 128; |
| 18465 | } |
| 18466 | |
| 18467 | if (!opt) |
| 18468 | return align; |
| 18469 | |
| 18470 | if (TREE_CODE (type) == ARRAY_TYPE) |
| 18471 | { |
| 18472 | if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) |
| 18473 | return 64; |
| 18474 | if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) |
| 18475 | return 128; |
| 18476 | } |
| 18477 | else if (TREE_CODE (type) == COMPLEX_TYPE) |
| 18478 | { |
| 18479 | |
| 18480 | if (TYPE_MODE (type) == DCmode && align < 64) |
| 18481 | return 64; |
| 18482 | if ((TYPE_MODE (type) == XCmode |
| 18483 | || TYPE_MODE (type) == TCmode) && align < 128) |
| 18484 | return 128; |
| 18485 | } |
| 18486 | else if (RECORD_OR_UNION_TYPE_P (type) |
| 18487 | && TYPE_FIELDS (type)) |
| 18488 | { |
| 18489 | if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) |
| 18490 | return 64; |
| 18491 | if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) |
| 18492 | return 128; |
| 18493 | } |
| 18494 | else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type) |
| 18495 | || TREE_CODE (type) == INTEGER_TYPE) |
| 18496 | { |
| 18497 | if (TYPE_MODE (type) == DFmode && align < 64) |
| 18498 | return 64; |
| 18499 | if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) |
| 18500 | return 128; |
| 18501 | } |
| 18502 | |
| 18503 | return align; |
| 18504 | } |
| 18505 | |
| 18506 | /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */ |
| 18507 | static void |
| 18508 | ix86_lower_local_decl_alignment (tree decl) |
| 18509 | { |
| 18510 | unsigned int new_align = ix86_local_alignment (decl, VOIDmode, |
| 18511 | DECL_ALIGN (decl), true); |
| 18512 | if (new_align < DECL_ALIGN (decl)) |
| 18513 | SET_DECL_ALIGN (decl, new_align); |
| 18514 | } |
| 18515 | |
| 18516 | /* Compute the alignment for a local variable or a stack slot. EXP is |
| 18517 | the data type or decl itself, MODE is the widest mode available and |
| 18518 | ALIGN is the alignment that the object would ordinarily have. The |
| 18519 | value of this macro is used instead of that alignment to align the |
| 18520 | object. */ |
| 18521 | |
| 18522 | unsigned int |
| 18523 | ix86_local_alignment (tree exp, machine_mode mode, |
| 18524 | unsigned int align, bool may_lower) |
| 18525 | { |
| 18526 | tree type, decl; |
| 18527 | |
| 18528 | if (exp && DECL_P (exp)) |
| 18529 | { |
| 18530 | type = TREE_TYPE (exp); |
| 18531 | decl = exp; |
| 18532 | } |
| 18533 | else |
| 18534 | { |
| 18535 | type = exp; |
| 18536 | decl = NULL; |
| 18537 | } |
| 18538 | |
| 18539 | /* Don't do dynamic stack realignment for long long objects with |
| 18540 | -mpreferred-stack-boundary=2. */ |
| 18541 | if (may_lower |
| 18542 | && !TARGET_64BIT |
| 18543 | && align == 64 |
| 18544 | && ix86_preferred_stack_boundary < 64 |
| 18545 | && (mode == DImode || (type && TYPE_MODE (type) == DImode)) |
| 18546 | && (!type || (!TYPE_USER_ALIGN (type) |
| 18547 | && !TYPE_ATOMIC (strip_array_types (type)))) |
| 18548 | && (!decl || !DECL_USER_ALIGN (decl))) |
| 18549 | align = 32; |
| 18550 | |
| 18551 | /* If TYPE is NULL, we are allocating a stack slot for caller-save |
| 18552 | register in MODE. We will return the largest alignment of XF |
| 18553 | and DF. */ |
| 18554 | if (!type) |
| 18555 | { |
| 18556 | if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) |
| 18557 | align = GET_MODE_ALIGNMENT (DFmode); |
| 18558 | return align; |
| 18559 | } |
| 18560 | |
| 18561 | /* Don't increase alignment for Intel MCU psABI. */ |
| 18562 | if (TARGET_IAMCU) |
| 18563 | return align; |
| 18564 | |
| 18565 | /* x86-64 ABI requires arrays greater than 16 bytes to be aligned |
| 18566 | to 16byte boundary. Exact wording is: |
| 18567 | |
| 18568 | An array uses the same alignment as its elements, except that a local or |
| 18569 | global array variable of length at least 16 bytes or |
| 18570 | a C99 variable-length array variable always has alignment of at least 16 bytes. |
| 18571 | |
| 18572 | This was added to allow use of aligned SSE instructions at arrays. This |
| 18573 | rule is meant for static storage (where compiler cannot do the analysis |
| 18574 | by itself). We follow it for automatic variables only when convenient. |
| 18575 | We fully control everything in the function compiled and functions from |
| 18576 | other unit cannot rely on the alignment. |
| 18577 | |
| 18578 | Exclude va_list type. It is the common case of local array where |
| 18579 | we cannot benefit from the alignment. |
| 18580 | |
| 18581 | TODO: Probably one should optimize for size only when var is not escaping. */ |
| 18582 | if (TARGET_64BIT && optimize_function_for_speed_p (cfun) |
| 18583 | && TARGET_SSE) |
| 18584 | { |
| 18585 | if (AGGREGATE_TYPE_P (type) |
| 18586 | && (va_list_type_node == NULL_TREE |
| 18587 | || (TYPE_MAIN_VARIANT (type) |
| 18588 | != TYPE_MAIN_VARIANT (va_list_type_node))) |
| 18589 | && TYPE_SIZE (type) |
| 18590 | && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST |
| 18591 | && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128) |
| 18592 | && align < 128) |
| 18593 | return 128; |
| 18594 | } |
| 18595 | if (TREE_CODE (type) == ARRAY_TYPE) |
| 18596 | { |
| 18597 | if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) |
| 18598 | return 64; |
| 18599 | if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) |
| 18600 | return 128; |
| 18601 | } |
| 18602 | else if (TREE_CODE (type) == COMPLEX_TYPE) |
| 18603 | { |
| 18604 | if (TYPE_MODE (type) == DCmode && align < 64) |
| 18605 | return 64; |
| 18606 | if ((TYPE_MODE (type) == XCmode |
| 18607 | || TYPE_MODE (type) == TCmode) && align < 128) |
| 18608 | return 128; |
| 18609 | } |
| 18610 | else if (RECORD_OR_UNION_TYPE_P (type) |
| 18611 | && TYPE_FIELDS (type)) |
| 18612 | { |
| 18613 | if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) |
| 18614 | return 64; |
| 18615 | if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) |
| 18616 | return 128; |
| 18617 | } |
| 18618 | else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type) |
| 18619 | || TREE_CODE (type) == INTEGER_TYPE) |
| 18620 | { |
| 18621 | |
| 18622 | if (TYPE_MODE (type) == DFmode && align < 64) |
| 18623 | return 64; |
| 18624 | if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) |
| 18625 | return 128; |
| 18626 | } |
| 18627 | return align; |
| 18628 | } |
| 18629 | |
| 18630 | /* Compute the minimum required alignment for dynamic stack realignment |
| 18631 | purposes for a local variable, parameter or a stack slot. EXP is |
| 18632 | the data type or decl itself, MODE is its mode and ALIGN is the |
| 18633 | alignment that the object would ordinarily have. */ |
| 18634 | |
| 18635 | unsigned int |
| 18636 | ix86_minimum_alignment (tree exp, machine_mode mode, |
| 18637 | unsigned int align) |
| 18638 | { |
| 18639 | tree type, decl; |
| 18640 | |
| 18641 | if (exp && DECL_P (exp)) |
| 18642 | { |
| 18643 | type = TREE_TYPE (exp); |
| 18644 | decl = exp; |
| 18645 | } |
| 18646 | else |
| 18647 | { |
| 18648 | type = exp; |
| 18649 | decl = NULL; |
| 18650 | } |
| 18651 | |
| 18652 | if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) |
| 18653 | return align; |
| 18654 | |
| 18655 | /* Don't do dynamic stack realignment for long long objects with |
| 18656 | -mpreferred-stack-boundary=2. */ |
| 18657 | if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) |
| 18658 | && (!type || (!TYPE_USER_ALIGN (type) |
| 18659 | && !TYPE_ATOMIC (strip_array_types (type)))) |
| 18660 | && (!decl || !DECL_USER_ALIGN (decl))) |
| 18661 | { |
| 18662 | gcc_checking_assert (!TARGET_STV); |
| 18663 | return 32; |
| 18664 | } |
| 18665 | |
| 18666 | return align; |
| 18667 | } |
| 18668 | |
| 18669 | /* Find a location for the static chain incoming to a nested function. |
| 18670 | This is a register, unless all free registers are used by arguments. */ |
| 18671 | |
| 18672 | static rtx |
| 18673 | ix86_static_chain (const_tree fndecl_or_type, bool incoming_p) |
| 18674 | { |
| 18675 | unsigned regno; |
| 18676 | |
| 18677 | if (TARGET_64BIT) |
| 18678 | { |
| 18679 | /* We always use R10 in 64-bit mode. */ |
| 18680 | regno = R10_REG; |
| 18681 | } |
| 18682 | else |
| 18683 | { |
| 18684 | const_tree fntype, fndecl; |
| 18685 | unsigned int ccvt; |
| 18686 | |
| 18687 | /* By default in 32-bit mode we use ECX to pass the static chain. */ |
| 18688 | regno = CX_REG; |
| 18689 | |
| 18690 | if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL) |
| 18691 | { |
| 18692 | fntype = TREE_TYPE (fndecl_or_type); |
| 18693 | fndecl = fndecl_or_type; |
| 18694 | } |
| 18695 | else |
| 18696 | { |
| 18697 | fntype = fndecl_or_type; |
| 18698 | fndecl = NULL; |
| 18699 | } |
| 18700 | |
| 18701 | ccvt = ix86_get_callcvt (type: fntype); |
| 18702 | if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
| 18703 | { |
| 18704 | /* Fastcall functions use ecx/edx for arguments, which leaves |
| 18705 | us with EAX for the static chain. |
| 18706 | Thiscall functions use ecx for arguments, which also |
| 18707 | leaves us with EAX for the static chain. */ |
| 18708 | regno = AX_REG; |
| 18709 | } |
| 18710 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
| 18711 | { |
| 18712 | /* Thiscall functions use ecx for arguments, which leaves |
| 18713 | us with EAX and EDX for the static chain. |
| 18714 | We are using for abi-compatibility EAX. */ |
| 18715 | regno = AX_REG; |
| 18716 | } |
| 18717 | else if (ix86_function_regparm (type: fntype, decl: fndecl) == 3) |
| 18718 | { |
| 18719 | /* For regparm 3, we have no free call-clobbered registers in |
| 18720 | which to store the static chain. In order to implement this, |
| 18721 | we have the trampoline push the static chain to the stack. |
| 18722 | However, we can't push a value below the return address when |
| 18723 | we call the nested function directly, so we have to use an |
| 18724 | alternate entry point. For this we use ESI, and have the |
| 18725 | alternate entry point push ESI, so that things appear the |
| 18726 | same once we're executing the nested function. */ |
| 18727 | if (incoming_p) |
| 18728 | { |
| 18729 | if (fndecl == current_function_decl |
| 18730 | && !ix86_static_chain_on_stack) |
| 18731 | { |
| 18732 | gcc_assert (!reload_completed); |
| 18733 | ix86_static_chain_on_stack = true; |
| 18734 | } |
| 18735 | return gen_frame_mem (SImode, |
| 18736 | plus_constant (Pmode, |
| 18737 | arg_pointer_rtx, -8)); |
| 18738 | } |
| 18739 | regno = SI_REG; |
| 18740 | } |
| 18741 | } |
| 18742 | |
| 18743 | return gen_rtx_REG (Pmode, regno); |
| 18744 | } |
| 18745 | |
| 18746 | /* Emit RTL insns to initialize the variable parts of a trampoline. |
| 18747 | FNDECL is the decl of the target address; M_TRAMP is a MEM for |
| 18748 | the trampoline, and CHAIN_VALUE is an RTX for the static chain |
| 18749 | to be passed to the target function. */ |
| 18750 | |
| 18751 | static void |
| 18752 | ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) |
| 18753 | { |
| 18754 | rtx mem, fnaddr; |
| 18755 | int opcode; |
| 18756 | int offset = 0; |
| 18757 | bool need_endbr = (flag_cf_protection & CF_BRANCH); |
| 18758 | |
| 18759 | fnaddr = XEXP (DECL_RTL (fndecl), 0); |
| 18760 | |
| 18761 | if (TARGET_64BIT) |
| 18762 | { |
| 18763 | int size; |
| 18764 | |
| 18765 | if (need_endbr) |
| 18766 | { |
| 18767 | /* Insert ENDBR64. */ |
| 18768 | mem = adjust_address (m_tramp, SImode, offset); |
| 18769 | emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode)); |
| 18770 | offset += 4; |
| 18771 | } |
| 18772 | |
| 18773 | /* Load the function address to r11. Try to load address using |
| 18774 | the shorter movl instead of movabs. We may want to support |
| 18775 | movq for kernel mode, but kernel does not use trampolines at |
| 18776 | the moment. FNADDR is a 32bit address and may not be in |
| 18777 | DImode when ptr_mode == SImode. Always use movl in this |
| 18778 | case. */ |
| 18779 | if (ptr_mode == SImode |
| 18780 | || x86_64_zext_immediate_operand (fnaddr, VOIDmode)) |
| 18781 | { |
| 18782 | fnaddr = copy_addr_to_reg (fnaddr); |
| 18783 | |
| 18784 | mem = adjust_address (m_tramp, HImode, offset); |
| 18785 | emit_move_insn (mem, gen_int_mode (0xbb41, HImode)); |
| 18786 | |
| 18787 | mem = adjust_address (m_tramp, SImode, offset + 2); |
| 18788 | emit_move_insn (mem, gen_lowpart (SImode, fnaddr)); |
| 18789 | offset += 6; |
| 18790 | } |
| 18791 | else |
| 18792 | { |
| 18793 | mem = adjust_address (m_tramp, HImode, offset); |
| 18794 | emit_move_insn (mem, gen_int_mode (0xbb49, HImode)); |
| 18795 | |
| 18796 | mem = adjust_address (m_tramp, DImode, offset + 2); |
| 18797 | emit_move_insn (mem, fnaddr); |
| 18798 | offset += 10; |
| 18799 | } |
| 18800 | |
| 18801 | /* Load static chain using movabs to r10. Use the shorter movl |
| 18802 | instead of movabs when ptr_mode == SImode. */ |
| 18803 | if (ptr_mode == SImode) |
| 18804 | { |
| 18805 | opcode = 0xba41; |
| 18806 | size = 6; |
| 18807 | } |
| 18808 | else |
| 18809 | { |
| 18810 | opcode = 0xba49; |
| 18811 | size = 10; |
| 18812 | } |
| 18813 | |
| 18814 | mem = adjust_address (m_tramp, HImode, offset); |
| 18815 | emit_move_insn (mem, gen_int_mode (opcode, HImode)); |
| 18816 | |
| 18817 | mem = adjust_address (m_tramp, ptr_mode, offset + 2); |
| 18818 | emit_move_insn (mem, chain_value); |
| 18819 | offset += size; |
| 18820 | |
| 18821 | /* Jump to r11; the last (unused) byte is a nop, only there to |
| 18822 | pad the write out to a single 32-bit store. */ |
| 18823 | mem = adjust_address (m_tramp, SImode, offset); |
| 18824 | emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode)); |
| 18825 | offset += 4; |
| 18826 | } |
| 18827 | else |
| 18828 | { |
| 18829 | rtx disp, chain; |
| 18830 | |
| 18831 | /* Depending on the static chain location, either load a register |
| 18832 | with a constant, or push the constant to the stack. All of the |
| 18833 | instructions are the same size. */ |
| 18834 | chain = ix86_static_chain (fndecl_or_type: fndecl, incoming_p: true); |
| 18835 | if (REG_P (chain)) |
| 18836 | { |
| 18837 | switch (REGNO (chain)) |
| 18838 | { |
| 18839 | case AX_REG: |
| 18840 | opcode = 0xb8; break; |
| 18841 | case CX_REG: |
| 18842 | opcode = 0xb9; break; |
| 18843 | default: |
| 18844 | gcc_unreachable (); |
| 18845 | } |
| 18846 | } |
| 18847 | else |
| 18848 | opcode = 0x68; |
| 18849 | |
| 18850 | if (need_endbr) |
| 18851 | { |
| 18852 | /* Insert ENDBR32. */ |
| 18853 | mem = adjust_address (m_tramp, SImode, offset); |
| 18854 | emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode)); |
| 18855 | offset += 4; |
| 18856 | } |
| 18857 | |
| 18858 | mem = adjust_address (m_tramp, QImode, offset); |
| 18859 | emit_move_insn (mem, gen_int_mode (opcode, QImode)); |
| 18860 | |
| 18861 | mem = adjust_address (m_tramp, SImode, offset + 1); |
| 18862 | emit_move_insn (mem, chain_value); |
| 18863 | offset += 5; |
| 18864 | |
| 18865 | mem = adjust_address (m_tramp, QImode, offset); |
| 18866 | emit_move_insn (mem, gen_int_mode (0xe9, QImode)); |
| 18867 | |
| 18868 | mem = adjust_address (m_tramp, SImode, offset + 1); |
| 18869 | |
| 18870 | /* Compute offset from the end of the jmp to the target function. |
| 18871 | In the case in which the trampoline stores the static chain on |
| 18872 | the stack, we need to skip the first insn which pushes the |
| 18873 | (call-saved) register static chain; this push is 1 byte. */ |
| 18874 | offset += 5; |
| 18875 | int skip = MEM_P (chain) ? 1 : 0; |
| 18876 | /* Skip ENDBR32 at the entry of the target function. */ |
| 18877 | if (need_endbr |
| 18878 | && !cgraph_node::get (decl: fndecl)->only_called_directly_p ()) |
| 18879 | skip += 4; |
| 18880 | disp = expand_binop (SImode, sub_optab, fnaddr, |
| 18881 | plus_constant (Pmode, XEXP (m_tramp, 0), |
| 18882 | offset - skip), |
| 18883 | NULL_RTX, 1, OPTAB_DIRECT); |
| 18884 | emit_move_insn (mem, disp); |
| 18885 | } |
| 18886 | |
| 18887 | gcc_assert (offset <= TRAMPOLINE_SIZE); |
| 18888 | |
| 18889 | #ifdef HAVE_ENABLE_EXECUTE_STACK |
| 18890 | #ifdef CHECK_EXECUTE_STACK_ENABLED |
| 18891 | if (CHECK_EXECUTE_STACK_ENABLED) |
| 18892 | #endif |
| 18893 | emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack" ), |
| 18894 | LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); |
| 18895 | #endif |
| 18896 | } |
| 18897 | |
| 18898 | static bool |
| 18899 | ix86_allocate_stack_slots_for_args (void) |
| 18900 | { |
| 18901 | /* Naked functions should not allocate stack slots for arguments. */ |
| 18902 | return !ix86_function_naked (fn: current_function_decl); |
| 18903 | } |
| 18904 | |
| 18905 | static bool |
| 18906 | ix86_warn_func_return (tree decl) |
| 18907 | { |
| 18908 | /* Naked functions are implemented entirely in assembly, including the |
| 18909 | return sequence, so suppress warnings about this. */ |
| 18910 | return !ix86_function_naked (fn: decl); |
| 18911 | } |
| 18912 | |
| 18913 | /* Return the shift count of a vector by scalar shift builtin second argument |
| 18914 | ARG1. */ |
| 18915 | static tree |
| 18916 | ix86_vector_shift_count (tree arg1) |
| 18917 | { |
| 18918 | if (tree_fits_uhwi_p (arg1)) |
| 18919 | return arg1; |
| 18920 | else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8) |
| 18921 | { |
| 18922 | /* The count argument is weird, passed in as various 128-bit |
| 18923 | (or 64-bit) vectors, the low 64 bits from it are the count. */ |
| 18924 | unsigned char buf[16]; |
| 18925 | int len = native_encode_expr (arg1, buf, 16); |
| 18926 | if (len == 0) |
| 18927 | return NULL_TREE; |
| 18928 | tree t = native_interpret_expr (uint64_type_node, buf, len); |
| 18929 | if (t && tree_fits_uhwi_p (t)) |
| 18930 | return t; |
| 18931 | } |
| 18932 | return NULL_TREE; |
| 18933 | } |
| 18934 | |
| 18935 | /* Return true if arg_mask is all ones, ELEMS is elements number of |
| 18936 | corresponding vector. */ |
| 18937 | static bool |
| 18938 | ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask) |
| 18939 | { |
| 18940 | if (TREE_CODE (arg_mask) != INTEGER_CST) |
| 18941 | return false; |
| 18942 | |
| 18943 | unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask); |
| 18944 | if (elems == HOST_BITS_PER_WIDE_INT) |
| 18945 | return mask == HOST_WIDE_INT_M1U; |
| 18946 | if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) |
| 18947 | return false; |
| 18948 | |
| 18949 | return true; |
| 18950 | } |
| 18951 | |
| 18952 | static tree |
| 18953 | ix86_fold_builtin (tree fndecl, int n_args, |
| 18954 | tree *args, bool ignore ATTRIBUTE_UNUSED) |
| 18955 | { |
| 18956 | if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) |
| 18957 | { |
| 18958 | enum ix86_builtins fn_code |
| 18959 | = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl); |
| 18960 | enum rtx_code rcode; |
| 18961 | bool is_vshift; |
| 18962 | enum tree_code tcode; |
| 18963 | bool is_scalar; |
| 18964 | unsigned HOST_WIDE_INT mask; |
| 18965 | |
| 18966 | switch (fn_code) |
| 18967 | { |
| 18968 | case IX86_BUILTIN_CPU_IS: |
| 18969 | case IX86_BUILTIN_CPU_SUPPORTS: |
| 18970 | gcc_assert (n_args == 1); |
| 18971 | return fold_builtin_cpu (fndecl, args); |
| 18972 | |
| 18973 | case IX86_BUILTIN_NANQ: |
| 18974 | case IX86_BUILTIN_NANSQ: |
| 18975 | { |
| 18976 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
| 18977 | const char *str = c_getstr (*args); |
| 18978 | int quiet = fn_code == IX86_BUILTIN_NANQ; |
| 18979 | REAL_VALUE_TYPE real; |
| 18980 | |
| 18981 | if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) |
| 18982 | return build_real (type, real); |
| 18983 | return NULL_TREE; |
| 18984 | } |
| 18985 | |
| 18986 | case IX86_BUILTIN_INFQ: |
| 18987 | case IX86_BUILTIN_HUGE_VALQ: |
| 18988 | { |
| 18989 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
| 18990 | REAL_VALUE_TYPE inf; |
| 18991 | real_inf (&inf); |
| 18992 | return build_real (type, inf); |
| 18993 | } |
| 18994 | |
| 18995 | case IX86_BUILTIN_TZCNT16: |
| 18996 | case IX86_BUILTIN_CTZS: |
| 18997 | case IX86_BUILTIN_TZCNT32: |
| 18998 | case IX86_BUILTIN_TZCNT64: |
| 18999 | gcc_assert (n_args == 1); |
| 19000 | if (TREE_CODE (args[0]) == INTEGER_CST) |
| 19001 | { |
| 19002 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
| 19003 | tree arg = args[0]; |
| 19004 | if (fn_code == IX86_BUILTIN_TZCNT16 |
| 19005 | || fn_code == IX86_BUILTIN_CTZS) |
| 19006 | arg = fold_convert (short_unsigned_type_node, arg); |
| 19007 | if (integer_zerop (arg)) |
| 19008 | return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); |
| 19009 | else |
| 19010 | return fold_const_call (CFN_CTZ, type, arg); |
| 19011 | } |
| 19012 | break; |
| 19013 | |
| 19014 | case IX86_BUILTIN_LZCNT16: |
| 19015 | case IX86_BUILTIN_CLZS: |
| 19016 | case IX86_BUILTIN_LZCNT32: |
| 19017 | case IX86_BUILTIN_LZCNT64: |
| 19018 | gcc_assert (n_args == 1); |
| 19019 | if (TREE_CODE (args[0]) == INTEGER_CST) |
| 19020 | { |
| 19021 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
| 19022 | tree arg = args[0]; |
| 19023 | if (fn_code == IX86_BUILTIN_LZCNT16 |
| 19024 | || fn_code == IX86_BUILTIN_CLZS) |
| 19025 | arg = fold_convert (short_unsigned_type_node, arg); |
| 19026 | if (integer_zerop (arg)) |
| 19027 | return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); |
| 19028 | else |
| 19029 | return fold_const_call (CFN_CLZ, type, arg); |
| 19030 | } |
| 19031 | break; |
| 19032 | |
| 19033 | case IX86_BUILTIN_BEXTR32: |
| 19034 | case IX86_BUILTIN_BEXTR64: |
| 19035 | case IX86_BUILTIN_BEXTRI32: |
| 19036 | case IX86_BUILTIN_BEXTRI64: |
| 19037 | gcc_assert (n_args == 2); |
| 19038 | if (tree_fits_uhwi_p (args[1])) |
| 19039 | { |
| 19040 | unsigned HOST_WIDE_INT res = 0; |
| 19041 | unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0])); |
| 19042 | unsigned int start = tree_to_uhwi (args[1]); |
| 19043 | unsigned int len = (start & 0xff00) >> 8; |
| 19044 | tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl)); |
| 19045 | start &= 0xff; |
| 19046 | if (start >= prec || len == 0) |
| 19047 | return omit_one_operand (lhs_type, build_zero_cst (lhs_type), |
| 19048 | args[0]); |
| 19049 | else if (!tree_fits_uhwi_p (args[0])) |
| 19050 | break; |
| 19051 | else |
| 19052 | res = tree_to_uhwi (args[0]) >> start; |
| 19053 | if (len > prec) |
| 19054 | len = prec; |
| 19055 | if (len < HOST_BITS_PER_WIDE_INT) |
| 19056 | res &= (HOST_WIDE_INT_1U << len) - 1; |
| 19057 | return build_int_cstu (type: lhs_type, res); |
| 19058 | } |
| 19059 | break; |
| 19060 | |
| 19061 | case IX86_BUILTIN_BZHI32: |
| 19062 | case IX86_BUILTIN_BZHI64: |
| 19063 | gcc_assert (n_args == 2); |
| 19064 | if (tree_fits_uhwi_p (args[1])) |
| 19065 | { |
| 19066 | unsigned int idx = tree_to_uhwi (args[1]) & 0xff; |
| 19067 | tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl)); |
| 19068 | if (idx >= TYPE_PRECISION (TREE_TYPE (args[0]))) |
| 19069 | return args[0]; |
| 19070 | if (idx == 0) |
| 19071 | return omit_one_operand (lhs_type, build_zero_cst (lhs_type), |
| 19072 | args[0]); |
| 19073 | if (!tree_fits_uhwi_p (args[0])) |
| 19074 | break; |
| 19075 | unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]); |
| 19076 | res &= ~(HOST_WIDE_INT_M1U << idx); |
| 19077 | return build_int_cstu (type: lhs_type, res); |
| 19078 | } |
| 19079 | break; |
| 19080 | |
| 19081 | case IX86_BUILTIN_PDEP32: |
| 19082 | case IX86_BUILTIN_PDEP64: |
| 19083 | gcc_assert (n_args == 2); |
| 19084 | if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) |
| 19085 | { |
| 19086 | unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); |
| 19087 | unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); |
| 19088 | unsigned HOST_WIDE_INT res = 0; |
| 19089 | unsigned HOST_WIDE_INT m, k = 1; |
| 19090 | for (m = 1; m; m <<= 1) |
| 19091 | if ((mask & m) != 0) |
| 19092 | { |
| 19093 | if ((src & k) != 0) |
| 19094 | res |= m; |
| 19095 | k <<= 1; |
| 19096 | } |
| 19097 | return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); |
| 19098 | } |
| 19099 | break; |
| 19100 | |
| 19101 | case IX86_BUILTIN_PEXT32: |
| 19102 | case IX86_BUILTIN_PEXT64: |
| 19103 | gcc_assert (n_args == 2); |
| 19104 | if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) |
| 19105 | { |
| 19106 | unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); |
| 19107 | unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); |
| 19108 | unsigned HOST_WIDE_INT res = 0; |
| 19109 | unsigned HOST_WIDE_INT m, k = 1; |
| 19110 | for (m = 1; m; m <<= 1) |
| 19111 | if ((mask & m) != 0) |
| 19112 | { |
| 19113 | if ((src & m) != 0) |
| 19114 | res |= k; |
| 19115 | k <<= 1; |
| 19116 | } |
| 19117 | return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); |
| 19118 | } |
| 19119 | break; |
| 19120 | |
| 19121 | case IX86_BUILTIN_MOVMSKPS: |
| 19122 | case IX86_BUILTIN_PMOVMSKB: |
| 19123 | case IX86_BUILTIN_MOVMSKPD: |
| 19124 | case IX86_BUILTIN_PMOVMSKB128: |
| 19125 | case IX86_BUILTIN_MOVMSKPD256: |
| 19126 | case IX86_BUILTIN_MOVMSKPS256: |
| 19127 | case IX86_BUILTIN_PMOVMSKB256: |
| 19128 | gcc_assert (n_args == 1); |
| 19129 | if (TREE_CODE (args[0]) == VECTOR_CST) |
| 19130 | { |
| 19131 | HOST_WIDE_INT res = 0; |
| 19132 | for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i) |
| 19133 | { |
| 19134 | tree e = VECTOR_CST_ELT (args[0], i); |
| 19135 | if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e)) |
| 19136 | { |
| 19137 | if (wi::neg_p (x: wi::to_wide (t: e))) |
| 19138 | res |= HOST_WIDE_INT_1 << i; |
| 19139 | } |
| 19140 | else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e)) |
| 19141 | { |
| 19142 | if (TREE_REAL_CST (e).sign) |
| 19143 | res |= HOST_WIDE_INT_1 << i; |
| 19144 | } |
| 19145 | else |
| 19146 | return NULL_TREE; |
| 19147 | } |
| 19148 | return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res); |
| 19149 | } |
| 19150 | break; |
| 19151 | |
| 19152 | case IX86_BUILTIN_PSLLD: |
| 19153 | case IX86_BUILTIN_PSLLD128: |
| 19154 | case IX86_BUILTIN_PSLLD128_MASK: |
| 19155 | case IX86_BUILTIN_PSLLD256: |
| 19156 | case IX86_BUILTIN_PSLLD256_MASK: |
| 19157 | case IX86_BUILTIN_PSLLD512: |
| 19158 | case IX86_BUILTIN_PSLLDI: |
| 19159 | case IX86_BUILTIN_PSLLDI128: |
| 19160 | case IX86_BUILTIN_PSLLDI128_MASK: |
| 19161 | case IX86_BUILTIN_PSLLDI256: |
| 19162 | case IX86_BUILTIN_PSLLDI256_MASK: |
| 19163 | case IX86_BUILTIN_PSLLDI512: |
| 19164 | case IX86_BUILTIN_PSLLQ: |
| 19165 | case IX86_BUILTIN_PSLLQ128: |
| 19166 | case IX86_BUILTIN_PSLLQ128_MASK: |
| 19167 | case IX86_BUILTIN_PSLLQ256: |
| 19168 | case IX86_BUILTIN_PSLLQ256_MASK: |
| 19169 | case IX86_BUILTIN_PSLLQ512: |
| 19170 | case IX86_BUILTIN_PSLLQI: |
| 19171 | case IX86_BUILTIN_PSLLQI128: |
| 19172 | case IX86_BUILTIN_PSLLQI128_MASK: |
| 19173 | case IX86_BUILTIN_PSLLQI256: |
| 19174 | case IX86_BUILTIN_PSLLQI256_MASK: |
| 19175 | case IX86_BUILTIN_PSLLQI512: |
| 19176 | case IX86_BUILTIN_PSLLW: |
| 19177 | case IX86_BUILTIN_PSLLW128: |
| 19178 | case IX86_BUILTIN_PSLLW128_MASK: |
| 19179 | case IX86_BUILTIN_PSLLW256: |
| 19180 | case IX86_BUILTIN_PSLLW256_MASK: |
| 19181 | case IX86_BUILTIN_PSLLW512_MASK: |
| 19182 | case IX86_BUILTIN_PSLLWI: |
| 19183 | case IX86_BUILTIN_PSLLWI128: |
| 19184 | case IX86_BUILTIN_PSLLWI128_MASK: |
| 19185 | case IX86_BUILTIN_PSLLWI256: |
| 19186 | case IX86_BUILTIN_PSLLWI256_MASK: |
| 19187 | case IX86_BUILTIN_PSLLWI512_MASK: |
| 19188 | rcode = ASHIFT; |
| 19189 | is_vshift = false; |
| 19190 | goto do_shift; |
| 19191 | case IX86_BUILTIN_PSRAD: |
| 19192 | case IX86_BUILTIN_PSRAD128: |
| 19193 | case IX86_BUILTIN_PSRAD128_MASK: |
| 19194 | case IX86_BUILTIN_PSRAD256: |
| 19195 | case IX86_BUILTIN_PSRAD256_MASK: |
| 19196 | case IX86_BUILTIN_PSRAD512: |
| 19197 | case IX86_BUILTIN_PSRADI: |
| 19198 | case IX86_BUILTIN_PSRADI128: |
| 19199 | case IX86_BUILTIN_PSRADI128_MASK: |
| 19200 | case IX86_BUILTIN_PSRADI256: |
| 19201 | case IX86_BUILTIN_PSRADI256_MASK: |
| 19202 | case IX86_BUILTIN_PSRADI512: |
| 19203 | case IX86_BUILTIN_PSRAQ128_MASK: |
| 19204 | case IX86_BUILTIN_PSRAQ256_MASK: |
| 19205 | case IX86_BUILTIN_PSRAQ512: |
| 19206 | case IX86_BUILTIN_PSRAQI128_MASK: |
| 19207 | case IX86_BUILTIN_PSRAQI256_MASK: |
| 19208 | case IX86_BUILTIN_PSRAQI512: |
| 19209 | case IX86_BUILTIN_PSRAW: |
| 19210 | case IX86_BUILTIN_PSRAW128: |
| 19211 | case IX86_BUILTIN_PSRAW128_MASK: |
| 19212 | case IX86_BUILTIN_PSRAW256: |
| 19213 | case IX86_BUILTIN_PSRAW256_MASK: |
| 19214 | case IX86_BUILTIN_PSRAW512: |
| 19215 | case IX86_BUILTIN_PSRAWI: |
| 19216 | case IX86_BUILTIN_PSRAWI128: |
| 19217 | case IX86_BUILTIN_PSRAWI128_MASK: |
| 19218 | case IX86_BUILTIN_PSRAWI256: |
| 19219 | case IX86_BUILTIN_PSRAWI256_MASK: |
| 19220 | case IX86_BUILTIN_PSRAWI512: |
| 19221 | rcode = ASHIFTRT; |
| 19222 | is_vshift = false; |
| 19223 | goto do_shift; |
| 19224 | case IX86_BUILTIN_PSRLD: |
| 19225 | case IX86_BUILTIN_PSRLD128: |
| 19226 | case IX86_BUILTIN_PSRLD128_MASK: |
| 19227 | case IX86_BUILTIN_PSRLD256: |
| 19228 | case IX86_BUILTIN_PSRLD256_MASK: |
| 19229 | case IX86_BUILTIN_PSRLD512: |
| 19230 | case IX86_BUILTIN_PSRLDI: |
| 19231 | case IX86_BUILTIN_PSRLDI128: |
| 19232 | case IX86_BUILTIN_PSRLDI128_MASK: |
| 19233 | case IX86_BUILTIN_PSRLDI256: |
| 19234 | case IX86_BUILTIN_PSRLDI256_MASK: |
| 19235 | case IX86_BUILTIN_PSRLDI512: |
| 19236 | case IX86_BUILTIN_PSRLQ: |
| 19237 | case IX86_BUILTIN_PSRLQ128: |
| 19238 | case IX86_BUILTIN_PSRLQ128_MASK: |
| 19239 | case IX86_BUILTIN_PSRLQ256: |
| 19240 | case IX86_BUILTIN_PSRLQ256_MASK: |
| 19241 | case IX86_BUILTIN_PSRLQ512: |
| 19242 | case IX86_BUILTIN_PSRLQI: |
| 19243 | case IX86_BUILTIN_PSRLQI128: |
| 19244 | case IX86_BUILTIN_PSRLQI128_MASK: |
| 19245 | case IX86_BUILTIN_PSRLQI256: |
| 19246 | case IX86_BUILTIN_PSRLQI256_MASK: |
| 19247 | case IX86_BUILTIN_PSRLQI512: |
| 19248 | case IX86_BUILTIN_PSRLW: |
| 19249 | case IX86_BUILTIN_PSRLW128: |
| 19250 | case IX86_BUILTIN_PSRLW128_MASK: |
| 19251 | case IX86_BUILTIN_PSRLW256: |
| 19252 | case IX86_BUILTIN_PSRLW256_MASK: |
| 19253 | case IX86_BUILTIN_PSRLW512: |
| 19254 | case IX86_BUILTIN_PSRLWI: |
| 19255 | case IX86_BUILTIN_PSRLWI128: |
| 19256 | case IX86_BUILTIN_PSRLWI128_MASK: |
| 19257 | case IX86_BUILTIN_PSRLWI256: |
| 19258 | case IX86_BUILTIN_PSRLWI256_MASK: |
| 19259 | case IX86_BUILTIN_PSRLWI512: |
| 19260 | rcode = LSHIFTRT; |
| 19261 | is_vshift = false; |
| 19262 | goto do_shift; |
| 19263 | case IX86_BUILTIN_PSLLVV16HI: |
| 19264 | case IX86_BUILTIN_PSLLVV16SI: |
| 19265 | case IX86_BUILTIN_PSLLVV2DI: |
| 19266 | case IX86_BUILTIN_PSLLVV2DI_MASK: |
| 19267 | case IX86_BUILTIN_PSLLVV32HI: |
| 19268 | case IX86_BUILTIN_PSLLVV4DI: |
| 19269 | case IX86_BUILTIN_PSLLVV4DI_MASK: |
| 19270 | case IX86_BUILTIN_PSLLVV4SI: |
| 19271 | case IX86_BUILTIN_PSLLVV4SI_MASK: |
| 19272 | case IX86_BUILTIN_PSLLVV8DI: |
| 19273 | case IX86_BUILTIN_PSLLVV8HI: |
| 19274 | case IX86_BUILTIN_PSLLVV8SI: |
| 19275 | case IX86_BUILTIN_PSLLVV8SI_MASK: |
| 19276 | rcode = ASHIFT; |
| 19277 | is_vshift = true; |
| 19278 | goto do_shift; |
| 19279 | case IX86_BUILTIN_PSRAVQ128: |
| 19280 | case IX86_BUILTIN_PSRAVQ256: |
| 19281 | case IX86_BUILTIN_PSRAVV16HI: |
| 19282 | case IX86_BUILTIN_PSRAVV16SI: |
| 19283 | case IX86_BUILTIN_PSRAVV32HI: |
| 19284 | case IX86_BUILTIN_PSRAVV4SI: |
| 19285 | case IX86_BUILTIN_PSRAVV4SI_MASK: |
| 19286 | case IX86_BUILTIN_PSRAVV8DI: |
| 19287 | case IX86_BUILTIN_PSRAVV8HI: |
| 19288 | case IX86_BUILTIN_PSRAVV8SI: |
| 19289 | case IX86_BUILTIN_PSRAVV8SI_MASK: |
| 19290 | rcode = ASHIFTRT; |
| 19291 | is_vshift = true; |
| 19292 | goto do_shift; |
| 19293 | case IX86_BUILTIN_PSRLVV16HI: |
| 19294 | case IX86_BUILTIN_PSRLVV16SI: |
| 19295 | case IX86_BUILTIN_PSRLVV2DI: |
| 19296 | case IX86_BUILTIN_PSRLVV2DI_MASK: |
| 19297 | case IX86_BUILTIN_PSRLVV32HI: |
| 19298 | case IX86_BUILTIN_PSRLVV4DI: |
| 19299 | case IX86_BUILTIN_PSRLVV4DI_MASK: |
| 19300 | case IX86_BUILTIN_PSRLVV4SI: |
| 19301 | case IX86_BUILTIN_PSRLVV4SI_MASK: |
| 19302 | case IX86_BUILTIN_PSRLVV8DI: |
| 19303 | case IX86_BUILTIN_PSRLVV8HI: |
| 19304 | case IX86_BUILTIN_PSRLVV8SI: |
| 19305 | case IX86_BUILTIN_PSRLVV8SI_MASK: |
| 19306 | rcode = LSHIFTRT; |
| 19307 | is_vshift = true; |
| 19308 | goto do_shift; |
| 19309 | |
| 19310 | do_shift: |
| 19311 | gcc_assert (n_args >= 2); |
| 19312 | if (TREE_CODE (args[0]) != VECTOR_CST) |
| 19313 | break; |
| 19314 | mask = HOST_WIDE_INT_M1U; |
| 19315 | if (n_args > 2) |
| 19316 | { |
| 19317 | /* This is masked shift. */ |
| 19318 | if (!tree_fits_uhwi_p (args[n_args - 1]) |
| 19319 | || TREE_SIDE_EFFECTS (args[n_args - 2])) |
| 19320 | break; |
| 19321 | mask = tree_to_uhwi (args[n_args - 1]); |
| 19322 | unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); |
| 19323 | mask |= HOST_WIDE_INT_M1U << elems; |
| 19324 | if (mask != HOST_WIDE_INT_M1U |
| 19325 | && TREE_CODE (args[n_args - 2]) != VECTOR_CST) |
| 19326 | break; |
| 19327 | if (mask == (HOST_WIDE_INT_M1U << elems)) |
| 19328 | return args[n_args - 2]; |
| 19329 | } |
| 19330 | if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST) |
| 19331 | break; |
| 19332 | if (tree tem = (is_vshift ? integer_one_node |
| 19333 | : ix86_vector_shift_count (arg1: args[1]))) |
| 19334 | { |
| 19335 | unsigned HOST_WIDE_INT count = tree_to_uhwi (tem); |
| 19336 | unsigned HOST_WIDE_INT prec |
| 19337 | = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))); |
| 19338 | if (count == 0 && mask == HOST_WIDE_INT_M1U) |
| 19339 | return args[0]; |
| 19340 | if (count >= prec) |
| 19341 | { |
| 19342 | if (rcode == ASHIFTRT) |
| 19343 | count = prec - 1; |
| 19344 | else if (mask == HOST_WIDE_INT_M1U) |
| 19345 | return build_zero_cst (TREE_TYPE (args[0])); |
| 19346 | } |
| 19347 | tree countt = NULL_TREE; |
| 19348 | if (!is_vshift) |
| 19349 | { |
| 19350 | if (count >= prec) |
| 19351 | countt = integer_zero_node; |
| 19352 | else |
| 19353 | countt = build_int_cst (integer_type_node, count); |
| 19354 | } |
| 19355 | tree_vector_builder builder; |
| 19356 | if (mask != HOST_WIDE_INT_M1U || is_vshift) |
| 19357 | builder.new_vector (TREE_TYPE (args[0]), |
| 19358 | npatterns: TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])), |
| 19359 | nelts_per_pattern: 1); |
| 19360 | else |
| 19361 | builder.new_unary_operation (TREE_TYPE (args[0]), vec: args[0], |
| 19362 | allow_stepped_p: false); |
| 19363 | unsigned int cnt = builder.encoded_nelts (); |
| 19364 | for (unsigned int i = 0; i < cnt; ++i) |
| 19365 | { |
| 19366 | tree elt = VECTOR_CST_ELT (args[0], i); |
| 19367 | if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt)) |
| 19368 | return NULL_TREE; |
| 19369 | tree type = TREE_TYPE (elt); |
| 19370 | if (rcode == LSHIFTRT) |
| 19371 | elt = fold_convert (unsigned_type_for (type), elt); |
| 19372 | if (is_vshift) |
| 19373 | { |
| 19374 | countt = VECTOR_CST_ELT (args[1], i); |
| 19375 | if (TREE_CODE (countt) != INTEGER_CST |
| 19376 | || TREE_OVERFLOW (countt)) |
| 19377 | return NULL_TREE; |
| 19378 | if (wi::neg_p (x: wi::to_wide (t: countt)) |
| 19379 | || wi::to_widest (t: countt) >= prec) |
| 19380 | { |
| 19381 | if (rcode == ASHIFTRT) |
| 19382 | countt = build_int_cst (TREE_TYPE (countt), |
| 19383 | prec - 1); |
| 19384 | else |
| 19385 | { |
| 19386 | elt = build_zero_cst (TREE_TYPE (elt)); |
| 19387 | countt = build_zero_cst (TREE_TYPE (countt)); |
| 19388 | } |
| 19389 | } |
| 19390 | } |
| 19391 | else if (count >= prec) |
| 19392 | elt = build_zero_cst (TREE_TYPE (elt)); |
| 19393 | elt = const_binop (rcode == ASHIFT |
| 19394 | ? LSHIFT_EXPR : RSHIFT_EXPR, |
| 19395 | TREE_TYPE (elt), elt, countt); |
| 19396 | if (!elt || TREE_CODE (elt) != INTEGER_CST) |
| 19397 | return NULL_TREE; |
| 19398 | if (rcode == LSHIFTRT) |
| 19399 | elt = fold_convert (type, elt); |
| 19400 | if ((mask & (HOST_WIDE_INT_1U << i)) == 0) |
| 19401 | { |
| 19402 | elt = VECTOR_CST_ELT (args[n_args - 2], i); |
| 19403 | if (TREE_CODE (elt) != INTEGER_CST |
| 19404 | || TREE_OVERFLOW (elt)) |
| 19405 | return NULL_TREE; |
| 19406 | } |
| 19407 | builder.quick_push (obj: elt); |
| 19408 | } |
| 19409 | return builder.build (); |
| 19410 | } |
| 19411 | break; |
| 19412 | |
| 19413 | case IX86_BUILTIN_MINSS: |
| 19414 | case IX86_BUILTIN_MINSH_MASK: |
| 19415 | tcode = LT_EXPR; |
| 19416 | is_scalar = true; |
| 19417 | goto do_minmax; |
| 19418 | |
| 19419 | case IX86_BUILTIN_MAXSS: |
| 19420 | case IX86_BUILTIN_MAXSH_MASK: |
| 19421 | tcode = GT_EXPR; |
| 19422 | is_scalar = true; |
| 19423 | goto do_minmax; |
| 19424 | |
| 19425 | case IX86_BUILTIN_MINPS: |
| 19426 | case IX86_BUILTIN_MINPD: |
| 19427 | case IX86_BUILTIN_MINPS256: |
| 19428 | case IX86_BUILTIN_MINPD256: |
| 19429 | case IX86_BUILTIN_MINPS512: |
| 19430 | case IX86_BUILTIN_MINPD512: |
| 19431 | case IX86_BUILTIN_MINPS128_MASK: |
| 19432 | case IX86_BUILTIN_MINPD128_MASK: |
| 19433 | case IX86_BUILTIN_MINPS256_MASK: |
| 19434 | case IX86_BUILTIN_MINPD256_MASK: |
| 19435 | case IX86_BUILTIN_MINPH128_MASK: |
| 19436 | case IX86_BUILTIN_MINPH256_MASK: |
| 19437 | case IX86_BUILTIN_MINPH512_MASK: |
| 19438 | tcode = LT_EXPR; |
| 19439 | is_scalar = false; |
| 19440 | goto do_minmax; |
| 19441 | |
| 19442 | case IX86_BUILTIN_MAXPS: |
| 19443 | case IX86_BUILTIN_MAXPD: |
| 19444 | case IX86_BUILTIN_MAXPS256: |
| 19445 | case IX86_BUILTIN_MAXPD256: |
| 19446 | case IX86_BUILTIN_MAXPS512: |
| 19447 | case IX86_BUILTIN_MAXPD512: |
| 19448 | case IX86_BUILTIN_MAXPS128_MASK: |
| 19449 | case IX86_BUILTIN_MAXPD128_MASK: |
| 19450 | case IX86_BUILTIN_MAXPS256_MASK: |
| 19451 | case IX86_BUILTIN_MAXPD256_MASK: |
| 19452 | case IX86_BUILTIN_MAXPH128_MASK: |
| 19453 | case IX86_BUILTIN_MAXPH256_MASK: |
| 19454 | case IX86_BUILTIN_MAXPH512_MASK: |
| 19455 | tcode = GT_EXPR; |
| 19456 | is_scalar = false; |
| 19457 | do_minmax: |
| 19458 | gcc_assert (n_args >= 2); |
| 19459 | if (TREE_CODE (args[0]) != VECTOR_CST |
| 19460 | || TREE_CODE (args[1]) != VECTOR_CST) |
| 19461 | break; |
| 19462 | mask = HOST_WIDE_INT_M1U; |
| 19463 | if (n_args > 2) |
| 19464 | { |
| 19465 | gcc_assert (n_args >= 4); |
| 19466 | /* This is masked minmax. */ |
| 19467 | if (TREE_CODE (args[3]) != INTEGER_CST |
| 19468 | || TREE_SIDE_EFFECTS (args[2])) |
| 19469 | break; |
| 19470 | mask = TREE_INT_CST_LOW (args[3]); |
| 19471 | unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); |
| 19472 | mask |= HOST_WIDE_INT_M1U << elems; |
| 19473 | if (mask != HOST_WIDE_INT_M1U |
| 19474 | && TREE_CODE (args[2]) != VECTOR_CST) |
| 19475 | break; |
| 19476 | if (n_args >= 5) |
| 19477 | { |
| 19478 | if (!tree_fits_uhwi_p (args[4])) |
| 19479 | break; |
| 19480 | if (tree_to_uhwi (args[4]) != 4 |
| 19481 | && tree_to_uhwi (args[4]) != 8) |
| 19482 | break; |
| 19483 | } |
| 19484 | if (mask == (HOST_WIDE_INT_M1U << elems)) |
| 19485 | return args[2]; |
| 19486 | } |
| 19487 | /* Punt on NaNs, unless exceptions are disabled. */ |
| 19488 | if (HONOR_NANS (args[0]) |
| 19489 | && (n_args < 5 || tree_to_uhwi (args[4]) != 8)) |
| 19490 | for (int i = 0; i < 2; ++i) |
| 19491 | { |
| 19492 | unsigned count = vector_cst_encoded_nelts (t: args[i]); |
| 19493 | for (unsigned j = 0; j < count; ++j) |
| 19494 | if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j))) |
| 19495 | return NULL_TREE; |
| 19496 | } |
| 19497 | { |
| 19498 | tree res = const_binop (tcode, |
| 19499 | truth_type_for (TREE_TYPE (args[0])), |
| 19500 | args[0], args[1]); |
| 19501 | if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST) |
| 19502 | break; |
| 19503 | res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res, |
| 19504 | args[0], args[1]); |
| 19505 | if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST) |
| 19506 | break; |
| 19507 | if (mask != HOST_WIDE_INT_M1U) |
| 19508 | { |
| 19509 | unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); |
| 19510 | vec_perm_builder sel (nelts, nelts, 1); |
| 19511 | for (unsigned int i = 0; i < nelts; i++) |
| 19512 | if (mask & (HOST_WIDE_INT_1U << i)) |
| 19513 | sel.quick_push (obj: i); |
| 19514 | else |
| 19515 | sel.quick_push (obj: nelts + i); |
| 19516 | vec_perm_indices indices (sel, 2, nelts); |
| 19517 | res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2], |
| 19518 | indices); |
| 19519 | if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST) |
| 19520 | break; |
| 19521 | } |
| 19522 | if (is_scalar) |
| 19523 | { |
| 19524 | unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); |
| 19525 | vec_perm_builder sel (nelts, nelts, 1); |
| 19526 | sel.quick_push (obj: 0); |
| 19527 | for (unsigned int i = 1; i < nelts; i++) |
| 19528 | sel.quick_push (obj: nelts + i); |
| 19529 | vec_perm_indices indices (sel, 2, nelts); |
| 19530 | res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0], |
| 19531 | indices); |
| 19532 | if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST) |
| 19533 | break; |
| 19534 | } |
| 19535 | return res; |
| 19536 | } |
| 19537 | |
| 19538 | default: |
| 19539 | break; |
| 19540 | } |
| 19541 | } |
| 19542 | |
| 19543 | #ifdef SUBTARGET_FOLD_BUILTIN |
| 19544 | return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); |
| 19545 | #endif |
| 19546 | |
| 19547 | return NULL_TREE; |
| 19548 | } |
| 19549 | |
| 19550 | /* Fold a MD builtin (use ix86_fold_builtin for folding into |
| 19551 | constant) in GIMPLE. */ |
| 19552 | |
| 19553 | bool |
| 19554 | ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) |
| 19555 | { |
| 19556 | gimple *stmt = gsi_stmt (i: *gsi), *g; |
| 19557 | gimple_seq stmts = NULL; |
| 19558 | tree fndecl = gimple_call_fndecl (gs: stmt); |
| 19559 | gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); |
| 19560 | int n_args = gimple_call_num_args (gs: stmt); |
| 19561 | enum ix86_builtins fn_code |
| 19562 | = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl); |
| 19563 | tree decl = NULL_TREE; |
| 19564 | tree arg0, arg1, arg2; |
| 19565 | enum rtx_code rcode; |
| 19566 | enum tree_code tcode; |
| 19567 | unsigned HOST_WIDE_INT count; |
| 19568 | bool is_vshift; |
| 19569 | unsigned HOST_WIDE_INT elems; |
| 19570 | location_t loc; |
| 19571 | |
| 19572 | /* Don't fold when there's isa mismatch. */ |
| 19573 | if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL)) |
| 19574 | return false; |
| 19575 | |
| 19576 | switch (fn_code) |
| 19577 | { |
| 19578 | case IX86_BUILTIN_TZCNT32: |
| 19579 | decl = builtin_decl_implicit (fncode: BUILT_IN_CTZ); |
| 19580 | goto fold_tzcnt_lzcnt; |
| 19581 | |
| 19582 | case IX86_BUILTIN_TZCNT64: |
| 19583 | decl = builtin_decl_implicit (fncode: BUILT_IN_CTZLL); |
| 19584 | goto fold_tzcnt_lzcnt; |
| 19585 | |
| 19586 | case IX86_BUILTIN_LZCNT32: |
| 19587 | decl = builtin_decl_implicit (fncode: BUILT_IN_CLZ); |
| 19588 | goto fold_tzcnt_lzcnt; |
| 19589 | |
| 19590 | case IX86_BUILTIN_LZCNT64: |
| 19591 | decl = builtin_decl_implicit (fncode: BUILT_IN_CLZLL); |
| 19592 | goto fold_tzcnt_lzcnt; |
| 19593 | |
| 19594 | fold_tzcnt_lzcnt: |
| 19595 | gcc_assert (n_args == 1); |
| 19596 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
| 19597 | if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (gs: stmt)) |
| 19598 | { |
| 19599 | int prec = TYPE_PRECISION (TREE_TYPE (arg0)); |
| 19600 | /* If arg0 is provably non-zero, optimize into generic |
| 19601 | __builtin_c[tl]z{,ll} function the middle-end handles |
| 19602 | better. */ |
| 19603 | if (!expr_not_equal_to (t: arg0, wi::zero (precision: prec))) |
| 19604 | return false; |
| 19605 | |
| 19606 | loc = gimple_location (g: stmt); |
| 19607 | g = gimple_build_call (decl, 1, arg0); |
| 19608 | gimple_set_location (g, location: loc); |
| 19609 | tree lhs = make_ssa_name (integer_type_node); |
| 19610 | gimple_call_set_lhs (gs: g, lhs); |
| 19611 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
| 19612 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), NOP_EXPR, lhs); |
| 19613 | gimple_set_location (g, location: loc); |
| 19614 | gsi_replace (gsi, g, false); |
| 19615 | return true; |
| 19616 | } |
| 19617 | break; |
| 19618 | |
| 19619 | case IX86_BUILTIN_BZHI32: |
| 19620 | case IX86_BUILTIN_BZHI64: |
| 19621 | gcc_assert (n_args == 2); |
| 19622 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
| 19623 | if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (gs: stmt)) |
| 19624 | { |
| 19625 | unsigned int idx = tree_to_uhwi (arg1) & 0xff; |
| 19626 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
| 19627 | if (idx < TYPE_PRECISION (TREE_TYPE (arg0))) |
| 19628 | break; |
| 19629 | loc = gimple_location (g: stmt); |
| 19630 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0); |
| 19631 | gimple_set_location (g, location: loc); |
| 19632 | gsi_replace (gsi, g, false); |
| 19633 | return true; |
| 19634 | } |
| 19635 | break; |
| 19636 | |
| 19637 | case IX86_BUILTIN_PDEP32: |
| 19638 | case IX86_BUILTIN_PDEP64: |
| 19639 | case IX86_BUILTIN_PEXT32: |
| 19640 | case IX86_BUILTIN_PEXT64: |
| 19641 | gcc_assert (n_args == 2); |
| 19642 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
| 19643 | if (integer_all_onesp (arg1) && gimple_call_lhs (gs: stmt)) |
| 19644 | { |
| 19645 | loc = gimple_location (g: stmt); |
| 19646 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
| 19647 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0); |
| 19648 | gimple_set_location (g, location: loc); |
| 19649 | gsi_replace (gsi, g, false); |
| 19650 | return true; |
| 19651 | } |
| 19652 | break; |
| 19653 | |
| 19654 | case IX86_BUILTIN_PBLENDVB256: |
| 19655 | case IX86_BUILTIN_BLENDVPS256: |
| 19656 | case IX86_BUILTIN_BLENDVPD256: |
| 19657 | /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower |
| 19658 | to scalar operations and not combined back. */ |
| 19659 | if (!TARGET_AVX2) |
| 19660 | break; |
| 19661 | |
| 19662 | /* FALLTHRU. */ |
| 19663 | case IX86_BUILTIN_BLENDVPD: |
| 19664 | /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2, |
| 19665 | w/o sse4.2, it's veclowered to scalar operations and |
| 19666 | not combined back. */ |
| 19667 | if (!TARGET_SSE4_2) |
| 19668 | break; |
| 19669 | /* FALLTHRU. */ |
| 19670 | case IX86_BUILTIN_PBLENDVB128: |
| 19671 | case IX86_BUILTIN_BLENDVPS: |
| 19672 | gcc_assert (n_args == 3); |
| 19673 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
| 19674 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
| 19675 | arg2 = gimple_call_arg (gs: stmt, index: 2); |
| 19676 | if (gimple_call_lhs (gs: stmt)) |
| 19677 | { |
| 19678 | loc = gimple_location (g: stmt); |
| 19679 | tree type = TREE_TYPE (arg2); |
| 19680 | if (VECTOR_FLOAT_TYPE_P (type)) |
| 19681 | { |
| 19682 | tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode |
| 19683 | ? intSI_type_node : intDI_type_node; |
| 19684 | type = get_same_sized_vectype (itype, type); |
| 19685 | } |
| 19686 | else |
| 19687 | type = signed_type_for (type); |
| 19688 | arg2 = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR, type, ops: arg2); |
| 19689 | tree zero_vec = build_zero_cst (type); |
| 19690 | tree cmp_type = truth_type_for (type); |
| 19691 | tree cmp = gimple_build (seq: &stmts, code: LT_EXPR, type: cmp_type, ops: arg2, ops: zero_vec); |
| 19692 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
| 19693 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
| 19694 | VEC_COND_EXPR, cmp, |
| 19695 | arg1, arg0); |
| 19696 | gimple_set_location (g, location: loc); |
| 19697 | gsi_replace (gsi, g, false); |
| 19698 | } |
| 19699 | else |
| 19700 | gsi_replace (gsi, gimple_build_nop (), false); |
| 19701 | return true; |
| 19702 | |
| 19703 | |
| 19704 | case IX86_BUILTIN_PCMPEQB128: |
| 19705 | case IX86_BUILTIN_PCMPEQW128: |
| 19706 | case IX86_BUILTIN_PCMPEQD128: |
| 19707 | case IX86_BUILTIN_PCMPEQQ: |
| 19708 | case IX86_BUILTIN_PCMPEQB256: |
| 19709 | case IX86_BUILTIN_PCMPEQW256: |
| 19710 | case IX86_BUILTIN_PCMPEQD256: |
| 19711 | case IX86_BUILTIN_PCMPEQQ256: |
| 19712 | tcode = EQ_EXPR; |
| 19713 | goto do_cmp; |
| 19714 | |
| 19715 | case IX86_BUILTIN_PCMPGTB128: |
| 19716 | case IX86_BUILTIN_PCMPGTW128: |
| 19717 | case IX86_BUILTIN_PCMPGTD128: |
| 19718 | case IX86_BUILTIN_PCMPGTQ: |
| 19719 | case IX86_BUILTIN_PCMPGTB256: |
| 19720 | case IX86_BUILTIN_PCMPGTW256: |
| 19721 | case IX86_BUILTIN_PCMPGTD256: |
| 19722 | case IX86_BUILTIN_PCMPGTQ256: |
| 19723 | tcode = GT_EXPR; |
| 19724 | |
| 19725 | do_cmp: |
| 19726 | gcc_assert (n_args == 2); |
| 19727 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
| 19728 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
| 19729 | if (gimple_call_lhs (gs: stmt)) |
| 19730 | { |
| 19731 | loc = gimple_location (g: stmt); |
| 19732 | tree type = TREE_TYPE (arg0); |
| 19733 | tree zero_vec = build_zero_cst (type); |
| 19734 | tree minus_one_vec = build_minus_one_cst (type); |
| 19735 | tree cmp_type = truth_type_for (type); |
| 19736 | tree cmp = gimple_build (seq: &stmts, code: tcode, type: cmp_type, ops: arg0, ops: arg1); |
| 19737 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
| 19738 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
| 19739 | VEC_COND_EXPR, cmp, |
| 19740 | minus_one_vec, zero_vec); |
| 19741 | gimple_set_location (g, location: loc); |
| 19742 | gsi_replace (gsi, g, false); |
| 19743 | } |
| 19744 | else |
| 19745 | gsi_replace (gsi, gimple_build_nop (), false); |
| 19746 | return true; |
| 19747 | |
| 19748 | case IX86_BUILTIN_PSLLD: |
| 19749 | case IX86_BUILTIN_PSLLD128: |
| 19750 | case IX86_BUILTIN_PSLLD128_MASK: |
| 19751 | case IX86_BUILTIN_PSLLD256: |
| 19752 | case IX86_BUILTIN_PSLLD256_MASK: |
| 19753 | case IX86_BUILTIN_PSLLD512: |
| 19754 | case IX86_BUILTIN_PSLLDI: |
| 19755 | case IX86_BUILTIN_PSLLDI128: |
| 19756 | case IX86_BUILTIN_PSLLDI128_MASK: |
| 19757 | case IX86_BUILTIN_PSLLDI256: |
| 19758 | case IX86_BUILTIN_PSLLDI256_MASK: |
| 19759 | case IX86_BUILTIN_PSLLDI512: |
| 19760 | case IX86_BUILTIN_PSLLQ: |
| 19761 | case IX86_BUILTIN_PSLLQ128: |
| 19762 | case IX86_BUILTIN_PSLLQ128_MASK: |
| 19763 | case IX86_BUILTIN_PSLLQ256: |
| 19764 | case IX86_BUILTIN_PSLLQ256_MASK: |
| 19765 | case IX86_BUILTIN_PSLLQ512: |
| 19766 | case IX86_BUILTIN_PSLLQI: |
| 19767 | case IX86_BUILTIN_PSLLQI128: |
| 19768 | case IX86_BUILTIN_PSLLQI128_MASK: |
| 19769 | case IX86_BUILTIN_PSLLQI256: |
| 19770 | case IX86_BUILTIN_PSLLQI256_MASK: |
| 19771 | case IX86_BUILTIN_PSLLQI512: |
| 19772 | case IX86_BUILTIN_PSLLW: |
| 19773 | case IX86_BUILTIN_PSLLW128: |
| 19774 | case IX86_BUILTIN_PSLLW128_MASK: |
| 19775 | case IX86_BUILTIN_PSLLW256: |
| 19776 | case IX86_BUILTIN_PSLLW256_MASK: |
| 19777 | case IX86_BUILTIN_PSLLW512_MASK: |
| 19778 | case IX86_BUILTIN_PSLLWI: |
| 19779 | case IX86_BUILTIN_PSLLWI128: |
| 19780 | case IX86_BUILTIN_PSLLWI128_MASK: |
| 19781 | case IX86_BUILTIN_PSLLWI256: |
| 19782 | case IX86_BUILTIN_PSLLWI256_MASK: |
| 19783 | case IX86_BUILTIN_PSLLWI512_MASK: |
| 19784 | rcode = ASHIFT; |
| 19785 | is_vshift = false; |
| 19786 | goto do_shift; |
| 19787 | case IX86_BUILTIN_PSRAD: |
| 19788 | case IX86_BUILTIN_PSRAD128: |
| 19789 | case IX86_BUILTIN_PSRAD128_MASK: |
| 19790 | case IX86_BUILTIN_PSRAD256: |
| 19791 | case IX86_BUILTIN_PSRAD256_MASK: |
| 19792 | case IX86_BUILTIN_PSRAD512: |
| 19793 | case IX86_BUILTIN_PSRADI: |
| 19794 | case IX86_BUILTIN_PSRADI128: |
| 19795 | case IX86_BUILTIN_PSRADI128_MASK: |
| 19796 | case IX86_BUILTIN_PSRADI256: |
| 19797 | case IX86_BUILTIN_PSRADI256_MASK: |
| 19798 | case IX86_BUILTIN_PSRADI512: |
| 19799 | case IX86_BUILTIN_PSRAQ128_MASK: |
| 19800 | case IX86_BUILTIN_PSRAQ256_MASK: |
| 19801 | case IX86_BUILTIN_PSRAQ512: |
| 19802 | case IX86_BUILTIN_PSRAQI128_MASK: |
| 19803 | case IX86_BUILTIN_PSRAQI256_MASK: |
| 19804 | case IX86_BUILTIN_PSRAQI512: |
| 19805 | case IX86_BUILTIN_PSRAW: |
| 19806 | case IX86_BUILTIN_PSRAW128: |
| 19807 | case IX86_BUILTIN_PSRAW128_MASK: |
| 19808 | case IX86_BUILTIN_PSRAW256: |
| 19809 | case IX86_BUILTIN_PSRAW256_MASK: |
| 19810 | case IX86_BUILTIN_PSRAW512: |
| 19811 | case IX86_BUILTIN_PSRAWI: |
| 19812 | case IX86_BUILTIN_PSRAWI128: |
| 19813 | case IX86_BUILTIN_PSRAWI128_MASK: |
| 19814 | case IX86_BUILTIN_PSRAWI256: |
| 19815 | case IX86_BUILTIN_PSRAWI256_MASK: |
| 19816 | case IX86_BUILTIN_PSRAWI512: |
| 19817 | rcode = ASHIFTRT; |
| 19818 | is_vshift = false; |
| 19819 | goto do_shift; |
| 19820 | case IX86_BUILTIN_PSRLD: |
| 19821 | case IX86_BUILTIN_PSRLD128: |
| 19822 | case IX86_BUILTIN_PSRLD128_MASK: |
| 19823 | case IX86_BUILTIN_PSRLD256: |
| 19824 | case IX86_BUILTIN_PSRLD256_MASK: |
| 19825 | case IX86_BUILTIN_PSRLD512: |
| 19826 | case IX86_BUILTIN_PSRLDI: |
| 19827 | case IX86_BUILTIN_PSRLDI128: |
| 19828 | case IX86_BUILTIN_PSRLDI128_MASK: |
| 19829 | case IX86_BUILTIN_PSRLDI256: |
| 19830 | case IX86_BUILTIN_PSRLDI256_MASK: |
| 19831 | case IX86_BUILTIN_PSRLDI512: |
| 19832 | case IX86_BUILTIN_PSRLQ: |
| 19833 | case IX86_BUILTIN_PSRLQ128: |
| 19834 | case IX86_BUILTIN_PSRLQ128_MASK: |
| 19835 | case IX86_BUILTIN_PSRLQ256: |
| 19836 | case IX86_BUILTIN_PSRLQ256_MASK: |
| 19837 | case IX86_BUILTIN_PSRLQ512: |
| 19838 | case IX86_BUILTIN_PSRLQI: |
| 19839 | case IX86_BUILTIN_PSRLQI128: |
| 19840 | case IX86_BUILTIN_PSRLQI128_MASK: |
| 19841 | case IX86_BUILTIN_PSRLQI256: |
| 19842 | case IX86_BUILTIN_PSRLQI256_MASK: |
| 19843 | case IX86_BUILTIN_PSRLQI512: |
| 19844 | case IX86_BUILTIN_PSRLW: |
| 19845 | case IX86_BUILTIN_PSRLW128: |
| 19846 | case IX86_BUILTIN_PSRLW128_MASK: |
| 19847 | case IX86_BUILTIN_PSRLW256: |
| 19848 | case IX86_BUILTIN_PSRLW256_MASK: |
| 19849 | case IX86_BUILTIN_PSRLW512: |
| 19850 | case IX86_BUILTIN_PSRLWI: |
| 19851 | case IX86_BUILTIN_PSRLWI128: |
| 19852 | case IX86_BUILTIN_PSRLWI128_MASK: |
| 19853 | case IX86_BUILTIN_PSRLWI256: |
| 19854 | case IX86_BUILTIN_PSRLWI256_MASK: |
| 19855 | case IX86_BUILTIN_PSRLWI512: |
| 19856 | rcode = LSHIFTRT; |
| 19857 | is_vshift = false; |
| 19858 | goto do_shift; |
| 19859 | case IX86_BUILTIN_PSLLVV16HI: |
| 19860 | case IX86_BUILTIN_PSLLVV16SI: |
| 19861 | case IX86_BUILTIN_PSLLVV2DI: |
| 19862 | case IX86_BUILTIN_PSLLVV2DI_MASK: |
| 19863 | case IX86_BUILTIN_PSLLVV32HI: |
| 19864 | case IX86_BUILTIN_PSLLVV4DI: |
| 19865 | case IX86_BUILTIN_PSLLVV4DI_MASK: |
| 19866 | case IX86_BUILTIN_PSLLVV4SI: |
| 19867 | case IX86_BUILTIN_PSLLVV4SI_MASK: |
| 19868 | case IX86_BUILTIN_PSLLVV8DI: |
| 19869 | case IX86_BUILTIN_PSLLVV8HI: |
| 19870 | case IX86_BUILTIN_PSLLVV8SI: |
| 19871 | case IX86_BUILTIN_PSLLVV8SI_MASK: |
| 19872 | rcode = ASHIFT; |
| 19873 | is_vshift = true; |
| 19874 | goto do_shift; |
| 19875 | case IX86_BUILTIN_PSRAVQ128: |
| 19876 | case IX86_BUILTIN_PSRAVQ256: |
| 19877 | case IX86_BUILTIN_PSRAVV16HI: |
| 19878 | case IX86_BUILTIN_PSRAVV16SI: |
| 19879 | case IX86_BUILTIN_PSRAVV32HI: |
| 19880 | case IX86_BUILTIN_PSRAVV4SI: |
| 19881 | case IX86_BUILTIN_PSRAVV4SI_MASK: |
| 19882 | case IX86_BUILTIN_PSRAVV8DI: |
| 19883 | case IX86_BUILTIN_PSRAVV8HI: |
| 19884 | case IX86_BUILTIN_PSRAVV8SI: |
| 19885 | case IX86_BUILTIN_PSRAVV8SI_MASK: |
| 19886 | rcode = ASHIFTRT; |
| 19887 | is_vshift = true; |
| 19888 | goto do_shift; |
| 19889 | case IX86_BUILTIN_PSRLVV16HI: |
| 19890 | case IX86_BUILTIN_PSRLVV16SI: |
| 19891 | case IX86_BUILTIN_PSRLVV2DI: |
| 19892 | case IX86_BUILTIN_PSRLVV2DI_MASK: |
| 19893 | case IX86_BUILTIN_PSRLVV32HI: |
| 19894 | case IX86_BUILTIN_PSRLVV4DI: |
| 19895 | case IX86_BUILTIN_PSRLVV4DI_MASK: |
| 19896 | case IX86_BUILTIN_PSRLVV4SI: |
| 19897 | case IX86_BUILTIN_PSRLVV4SI_MASK: |
| 19898 | case IX86_BUILTIN_PSRLVV8DI: |
| 19899 | case IX86_BUILTIN_PSRLVV8HI: |
| 19900 | case IX86_BUILTIN_PSRLVV8SI: |
| 19901 | case IX86_BUILTIN_PSRLVV8SI_MASK: |
| 19902 | rcode = LSHIFTRT; |
| 19903 | is_vshift = true; |
| 19904 | goto do_shift; |
| 19905 | |
| 19906 | do_shift: |
| 19907 | gcc_assert (n_args >= 2); |
| 19908 | if (!gimple_call_lhs (gs: stmt)) |
| 19909 | { |
| 19910 | gsi_replace (gsi, gimple_build_nop (), false); |
| 19911 | return true; |
| 19912 | } |
| 19913 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
| 19914 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
| 19915 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
| 19916 | /* For masked shift, only optimize if the mask is all ones. */ |
| 19917 | if (n_args > 2 |
| 19918 | && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1))) |
| 19919 | break; |
| 19920 | if (is_vshift) |
| 19921 | { |
| 19922 | if (TREE_CODE (arg1) != VECTOR_CST) |
| 19923 | break; |
| 19924 | count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))); |
| 19925 | if (integer_zerop (arg1)) |
| 19926 | count = 0; |
| 19927 | else if (rcode == ASHIFTRT) |
| 19928 | break; |
| 19929 | else |
| 19930 | for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i) |
| 19931 | { |
| 19932 | tree elt = VECTOR_CST_ELT (arg1, i); |
| 19933 | if (!wi::neg_p (x: wi::to_wide (t: elt)) |
| 19934 | && wi::to_widest (t: elt) < count) |
| 19935 | return false; |
| 19936 | } |
| 19937 | } |
| 19938 | else |
| 19939 | { |
| 19940 | arg1 = ix86_vector_shift_count (arg1); |
| 19941 | if (!arg1) |
| 19942 | break; |
| 19943 | count = tree_to_uhwi (arg1); |
| 19944 | } |
| 19945 | if (count == 0) |
| 19946 | { |
| 19947 | /* Just return the first argument for shift by 0. */ |
| 19948 | loc = gimple_location (g: stmt); |
| 19949 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0); |
| 19950 | gimple_set_location (g, location: loc); |
| 19951 | gsi_replace (gsi, g, false); |
| 19952 | return true; |
| 19953 | } |
| 19954 | if (rcode != ASHIFTRT |
| 19955 | && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)))) |
| 19956 | { |
| 19957 | /* For shift counts equal or greater than precision, except for |
| 19958 | arithmetic right shift the result is zero. */ |
| 19959 | loc = gimple_location (g: stmt); |
| 19960 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
| 19961 | build_zero_cst (TREE_TYPE (arg0))); |
| 19962 | gimple_set_location (g, location: loc); |
| 19963 | gsi_replace (gsi, g, false); |
| 19964 | return true; |
| 19965 | } |
| 19966 | break; |
| 19967 | |
| 19968 | case IX86_BUILTIN_SHUFPD512: |
| 19969 | case IX86_BUILTIN_SHUFPS512: |
| 19970 | case IX86_BUILTIN_SHUFPD: |
| 19971 | case IX86_BUILTIN_SHUFPD256: |
| 19972 | case IX86_BUILTIN_SHUFPS: |
| 19973 | case IX86_BUILTIN_SHUFPS256: |
| 19974 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
| 19975 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
| 19976 | /* This is masked shuffle. Only optimize if the mask is all ones. */ |
| 19977 | if (n_args > 3 |
| 19978 | && !ix86_masked_all_ones (elems, |
| 19979 | arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1))) |
| 19980 | break; |
| 19981 | arg2 = gimple_call_arg (gs: stmt, index: 2); |
| 19982 | if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (gs: stmt)) |
| 19983 | { |
| 19984 | unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2); |
| 19985 | /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */ |
| 19986 | if (shuffle_mask > 255) |
| 19987 | return false; |
| 19988 | |
| 19989 | machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))); |
| 19990 | loc = gimple_location (g: stmt); |
| 19991 | tree itype = (imode == E_DFmode |
| 19992 | ? long_long_integer_type_node : integer_type_node); |
| 19993 | tree vtype = build_vector_type (itype, elems); |
| 19994 | tree_vector_builder elts (vtype, elems, 1); |
| 19995 | |
| 19996 | |
| 19997 | /* Transform integer shuffle_mask to vector perm_mask which |
| 19998 | is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */ |
| 19999 | for (unsigned i = 0; i != elems; i++) |
| 20000 | { |
| 20001 | unsigned sel_idx; |
| 20002 | /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6]) |
| 20003 | provide 2 select constrols for each element of the |
| 20004 | destination. */ |
| 20005 | if (imode == E_DFmode) |
| 20006 | sel_idx = (i & 1) * elems + (i & ~1) |
| 20007 | + ((shuffle_mask >> i) & 1); |
| 20008 | else |
| 20009 | { |
| 20010 | /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select |
| 20011 | controls for each element of the destination. */ |
| 20012 | unsigned j = i % 4; |
| 20013 | sel_idx = ((i >> 1) & 1) * elems + (i & ~3) |
| 20014 | + ((shuffle_mask >> 2 * j) & 3); |
| 20015 | } |
| 20016 | elts.quick_push (obj: build_int_cst (itype, sel_idx)); |
| 20017 | } |
| 20018 | |
| 20019 | tree perm_mask = elts.build (); |
| 20020 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
| 20021 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
| 20022 | VEC_PERM_EXPR, |
| 20023 | arg0, arg1, perm_mask); |
| 20024 | gimple_set_location (g, location: loc); |
| 20025 | gsi_replace (gsi, g, false); |
| 20026 | return true; |
| 20027 | } |
| 20028 | // Do not error yet, the constant could be propagated later? |
| 20029 | break; |
| 20030 | |
| 20031 | case IX86_BUILTIN_PABSB: |
| 20032 | case IX86_BUILTIN_PABSW: |
| 20033 | case IX86_BUILTIN_PABSD: |
| 20034 | /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */ |
| 20035 | if (!TARGET_MMX_WITH_SSE) |
| 20036 | break; |
| 20037 | /* FALLTHRU. */ |
| 20038 | case IX86_BUILTIN_PABSB128: |
| 20039 | case IX86_BUILTIN_PABSB256: |
| 20040 | case IX86_BUILTIN_PABSB512: |
| 20041 | case IX86_BUILTIN_PABSW128: |
| 20042 | case IX86_BUILTIN_PABSW256: |
| 20043 | case IX86_BUILTIN_PABSW512: |
| 20044 | case IX86_BUILTIN_PABSD128: |
| 20045 | case IX86_BUILTIN_PABSD256: |
| 20046 | case IX86_BUILTIN_PABSD512: |
| 20047 | case IX86_BUILTIN_PABSQ128: |
| 20048 | case IX86_BUILTIN_PABSQ256: |
| 20049 | case IX86_BUILTIN_PABSQ512: |
| 20050 | case IX86_BUILTIN_PABSB128_MASK: |
| 20051 | case IX86_BUILTIN_PABSB256_MASK: |
| 20052 | case IX86_BUILTIN_PABSW128_MASK: |
| 20053 | case IX86_BUILTIN_PABSW256_MASK: |
| 20054 | case IX86_BUILTIN_PABSD128_MASK: |
| 20055 | case IX86_BUILTIN_PABSD256_MASK: |
| 20056 | gcc_assert (n_args >= 1); |
| 20057 | if (!gimple_call_lhs (gs: stmt)) |
| 20058 | { |
| 20059 | gsi_replace (gsi, gimple_build_nop (), false); |
| 20060 | return true; |
| 20061 | } |
| 20062 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
| 20063 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
| 20064 | /* For masked ABS, only optimize if the mask is all ones. */ |
| 20065 | if (n_args > 1 |
| 20066 | && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1))) |
| 20067 | break; |
| 20068 | { |
| 20069 | tree utype, ures, vce; |
| 20070 | utype = unsigned_type_for (TREE_TYPE (arg0)); |
| 20071 | /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR |
| 20072 | instead of ABS_EXPR to handle overflow case(TYPE_MIN). */ |
| 20073 | ures = gimple_build (seq: &stmts, code: ABSU_EXPR, type: utype, ops: arg0); |
| 20074 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
| 20075 | loc = gimple_location (g: stmt); |
| 20076 | vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures); |
| 20077 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
| 20078 | VIEW_CONVERT_EXPR, vce); |
| 20079 | gsi_replace (gsi, g, false); |
| 20080 | } |
| 20081 | return true; |
| 20082 | |
| 20083 | case IX86_BUILTIN_MINPS: |
| 20084 | case IX86_BUILTIN_MINPD: |
| 20085 | case IX86_BUILTIN_MINPS256: |
| 20086 | case IX86_BUILTIN_MINPD256: |
| 20087 | case IX86_BUILTIN_MINPS512: |
| 20088 | case IX86_BUILTIN_MINPD512: |
| 20089 | case IX86_BUILTIN_MINPS128_MASK: |
| 20090 | case IX86_BUILTIN_MINPD128_MASK: |
| 20091 | case IX86_BUILTIN_MINPS256_MASK: |
| 20092 | case IX86_BUILTIN_MINPD256_MASK: |
| 20093 | case IX86_BUILTIN_MINPH128_MASK: |
| 20094 | case IX86_BUILTIN_MINPH256_MASK: |
| 20095 | case IX86_BUILTIN_MINPH512_MASK: |
| 20096 | tcode = LT_EXPR; |
| 20097 | goto do_minmax; |
| 20098 | |
| 20099 | case IX86_BUILTIN_MAXPS: |
| 20100 | case IX86_BUILTIN_MAXPD: |
| 20101 | case IX86_BUILTIN_MAXPS256: |
| 20102 | case IX86_BUILTIN_MAXPD256: |
| 20103 | case IX86_BUILTIN_MAXPS512: |
| 20104 | case IX86_BUILTIN_MAXPD512: |
| 20105 | case IX86_BUILTIN_MAXPS128_MASK: |
| 20106 | case IX86_BUILTIN_MAXPD128_MASK: |
| 20107 | case IX86_BUILTIN_MAXPS256_MASK: |
| 20108 | case IX86_BUILTIN_MAXPD256_MASK: |
| 20109 | case IX86_BUILTIN_MAXPH128_MASK: |
| 20110 | case IX86_BUILTIN_MAXPH256_MASK: |
| 20111 | case IX86_BUILTIN_MAXPH512_MASK: |
| 20112 | tcode = GT_EXPR; |
| 20113 | do_minmax: |
| 20114 | gcc_assert (n_args >= 2); |
| 20115 | /* Without SSE4.1 we often aren't able to pattern match it back to the |
| 20116 | desired instruction. */ |
| 20117 | if (!gimple_call_lhs (gs: stmt) || !optimize || !TARGET_SSE4_1) |
| 20118 | break; |
| 20119 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
| 20120 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
| 20121 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
| 20122 | /* For masked minmax, only optimize if the mask is all ones. */ |
| 20123 | if (n_args > 2 |
| 20124 | && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: 3))) |
| 20125 | break; |
| 20126 | if (n_args >= 5) |
| 20127 | { |
| 20128 | tree arg4 = gimple_call_arg (gs: stmt, index: 4); |
| 20129 | if (!tree_fits_uhwi_p (arg4)) |
| 20130 | break; |
| 20131 | if (tree_to_uhwi (arg4) == 4) |
| 20132 | /* Ok. */; |
| 20133 | else if (tree_to_uhwi (arg4) != 8) |
| 20134 | /* Invalid round argument. */ |
| 20135 | break; |
| 20136 | else if (HONOR_NANS (arg0)) |
| 20137 | /* Lowering to comparison would raise exceptions which |
| 20138 | shouldn't be raised. */ |
| 20139 | break; |
| 20140 | } |
| 20141 | { |
| 20142 | tree type = truth_type_for (TREE_TYPE (arg0)); |
| 20143 | tree cmpres = gimple_build (seq: &stmts, code: tcode, type, ops: arg0, ops: arg1); |
| 20144 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
| 20145 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
| 20146 | VEC_COND_EXPR, cmpres, arg0, arg1); |
| 20147 | gsi_replace (gsi, g, false); |
| 20148 | } |
| 20149 | return true; |
| 20150 | |
| 20151 | default: |
| 20152 | break; |
| 20153 | } |
| 20154 | |
| 20155 | return false; |
| 20156 | } |
| 20157 | |
| 20158 | /* Handler for an SVML-style interface to |
| 20159 | a library with vectorized intrinsics. */ |
| 20160 | |
| 20161 | tree |
| 20162 | ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in) |
| 20163 | { |
| 20164 | char name[20]; |
| 20165 | tree fntype, new_fndecl, args; |
| 20166 | unsigned arity; |
| 20167 | const char *bname; |
| 20168 | machine_mode el_mode, in_mode; |
| 20169 | int n, in_n; |
| 20170 | |
| 20171 | /* The SVML is suitable for unsafe math only. */ |
| 20172 | if (!flag_unsafe_math_optimizations) |
| 20173 | return NULL_TREE; |
| 20174 | |
| 20175 | el_mode = TYPE_MODE (TREE_TYPE (type_out)); |
| 20176 | n = TYPE_VECTOR_SUBPARTS (node: type_out); |
| 20177 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); |
| 20178 | in_n = TYPE_VECTOR_SUBPARTS (node: type_in); |
| 20179 | if (el_mode != in_mode |
| 20180 | || n != in_n) |
| 20181 | return NULL_TREE; |
| 20182 | |
| 20183 | switch (fn) |
| 20184 | { |
| 20185 | CASE_CFN_EXP: |
| 20186 | CASE_CFN_LOG: |
| 20187 | CASE_CFN_LOG10: |
| 20188 | CASE_CFN_POW: |
| 20189 | CASE_CFN_TANH: |
| 20190 | CASE_CFN_TAN: |
| 20191 | CASE_CFN_ATAN: |
| 20192 | CASE_CFN_ATAN2: |
| 20193 | CASE_CFN_ATANH: |
| 20194 | CASE_CFN_CBRT: |
| 20195 | CASE_CFN_SINH: |
| 20196 | CASE_CFN_SIN: |
| 20197 | CASE_CFN_ASINH: |
| 20198 | CASE_CFN_ASIN: |
| 20199 | CASE_CFN_COSH: |
| 20200 | CASE_CFN_COS: |
| 20201 | CASE_CFN_ACOSH: |
| 20202 | CASE_CFN_ACOS: |
| 20203 | if ((el_mode != DFmode || n != 2) |
| 20204 | && (el_mode != SFmode || n != 4)) |
| 20205 | return NULL_TREE; |
| 20206 | break; |
| 20207 | |
| 20208 | default: |
| 20209 | return NULL_TREE; |
| 20210 | } |
| 20211 | |
| 20212 | tree fndecl = mathfn_built_in (el_mode == DFmode |
| 20213 | ? double_type_node : float_type_node, fn); |
| 20214 | bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); |
| 20215 | |
| 20216 | if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOGF) |
| 20217 | strcpy (dest: name, src: "vmlsLn4" ); |
| 20218 | else if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOG) |
| 20219 | strcpy (dest: name, src: "vmldLn2" ); |
| 20220 | else if (n == 4) |
| 20221 | { |
| 20222 | sprintf (s: name, format: "vmls%s" , bname+10); |
| 20223 | name[strlen (s: name)-1] = '4'; |
| 20224 | } |
| 20225 | else |
| 20226 | sprintf (s: name, format: "vmld%s2" , bname+10); |
| 20227 | |
| 20228 | /* Convert to uppercase. */ |
| 20229 | name[4] &= ~0x20; |
| 20230 | |
| 20231 | arity = 0; |
| 20232 | for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) |
| 20233 | arity++; |
| 20234 | |
| 20235 | if (arity == 1) |
| 20236 | fntype = build_function_type_list (type_out, type_in, NULL); |
| 20237 | else |
| 20238 | fntype = build_function_type_list (type_out, type_in, type_in, NULL); |
| 20239 | |
| 20240 | /* Build a function declaration for the vectorized function. */ |
| 20241 | new_fndecl = build_decl (BUILTINS_LOCATION, |
| 20242 | FUNCTION_DECL, get_identifier (name), fntype); |
| 20243 | TREE_PUBLIC (new_fndecl) = 1; |
| 20244 | DECL_EXTERNAL (new_fndecl) = 1; |
| 20245 | DECL_IS_NOVOPS (new_fndecl) = 1; |
| 20246 | TREE_READONLY (new_fndecl) = 1; |
| 20247 | |
| 20248 | return new_fndecl; |
| 20249 | } |
| 20250 | |
| 20251 | /* Handler for an ACML-style interface to |
| 20252 | a library with vectorized intrinsics. */ |
| 20253 | |
| 20254 | tree |
| 20255 | ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in) |
| 20256 | { |
| 20257 | char name[20] = "__vr.._" ; |
| 20258 | tree fntype, new_fndecl, args; |
| 20259 | unsigned arity; |
| 20260 | const char *bname; |
| 20261 | machine_mode el_mode, in_mode; |
| 20262 | int n, in_n; |
| 20263 | |
| 20264 | /* The ACML is 64bits only and suitable for unsafe math only as |
| 20265 | it does not correctly support parts of IEEE with the required |
| 20266 | precision such as denormals. */ |
| 20267 | if (!TARGET_64BIT |
| 20268 | || !flag_unsafe_math_optimizations) |
| 20269 | return NULL_TREE; |
| 20270 | |
| 20271 | el_mode = TYPE_MODE (TREE_TYPE (type_out)); |
| 20272 | n = TYPE_VECTOR_SUBPARTS (node: type_out); |
| 20273 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); |
| 20274 | in_n = TYPE_VECTOR_SUBPARTS (node: type_in); |
| 20275 | if (el_mode != in_mode |
| 20276 | || n != in_n) |
| 20277 | return NULL_TREE; |
| 20278 | |
| 20279 | switch (fn) |
| 20280 | { |
| 20281 | CASE_CFN_SIN: |
| 20282 | CASE_CFN_COS: |
| 20283 | CASE_CFN_EXP: |
| 20284 | CASE_CFN_LOG: |
| 20285 | CASE_CFN_LOG2: |
| 20286 | CASE_CFN_LOG10: |
| 20287 | if (el_mode == DFmode && n == 2) |
| 20288 | { |
| 20289 | name[4] = 'd'; |
| 20290 | name[5] = '2'; |
| 20291 | } |
| 20292 | else if (el_mode == SFmode && n == 4) |
| 20293 | { |
| 20294 | name[4] = 's'; |
| 20295 | name[5] = '4'; |
| 20296 | } |
| 20297 | else |
| 20298 | return NULL_TREE; |
| 20299 | break; |
| 20300 | |
| 20301 | default: |
| 20302 | return NULL_TREE; |
| 20303 | } |
| 20304 | |
| 20305 | tree fndecl = mathfn_built_in (el_mode == DFmode |
| 20306 | ? double_type_node : float_type_node, fn); |
| 20307 | bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); |
| 20308 | sprintf (s: name + 7, format: "%s" , bname+10); |
| 20309 | |
| 20310 | arity = 0; |
| 20311 | for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) |
| 20312 | arity++; |
| 20313 | |
| 20314 | if (arity == 1) |
| 20315 | fntype = build_function_type_list (type_out, type_in, NULL); |
| 20316 | else |
| 20317 | fntype = build_function_type_list (type_out, type_in, type_in, NULL); |
| 20318 | |
| 20319 | /* Build a function declaration for the vectorized function. */ |
| 20320 | new_fndecl = build_decl (BUILTINS_LOCATION, |
| 20321 | FUNCTION_DECL, get_identifier (name), fntype); |
| 20322 | TREE_PUBLIC (new_fndecl) = 1; |
| 20323 | DECL_EXTERNAL (new_fndecl) = 1; |
| 20324 | DECL_IS_NOVOPS (new_fndecl) = 1; |
| 20325 | TREE_READONLY (new_fndecl) = 1; |
| 20326 | |
| 20327 | return new_fndecl; |
| 20328 | } |
| 20329 | |
| 20330 | /* Handler for an AOCL-LibM-style interface to |
| 20331 | a library with vectorized intrinsics. */ |
| 20332 | |
| 20333 | tree |
| 20334 | ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in) |
| 20335 | { |
| 20336 | char name[20] = "amd_vr" ; |
| 20337 | int name_len = 6; |
| 20338 | tree fntype, new_fndecl, args; |
| 20339 | unsigned arity; |
| 20340 | const char *bname; |
| 20341 | machine_mode el_mode, in_mode; |
| 20342 | int n, in_n; |
| 20343 | |
| 20344 | /* AOCL-LibM is 64bits only. It is also only suitable for unsafe math only |
| 20345 | as it trades off some accuracy for increased performance. */ |
| 20346 | if (!TARGET_64BIT |
| 20347 | || !flag_unsafe_math_optimizations) |
| 20348 | return NULL_TREE; |
| 20349 | |
| 20350 | el_mode = TYPE_MODE (TREE_TYPE (type_out)); |
| 20351 | n = TYPE_VECTOR_SUBPARTS (node: type_out); |
| 20352 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); |
| 20353 | in_n = TYPE_VECTOR_SUBPARTS (node: type_in); |
| 20354 | if (el_mode != in_mode |
| 20355 | || n != in_n) |
| 20356 | return NULL_TREE; |
| 20357 | |
| 20358 | gcc_checking_assert (n > 0); |
| 20359 | |
| 20360 | /* Decide whether there exists a function for the combination of FN, the mode |
| 20361 | and the vector width. Return early if it doesn't. */ |
| 20362 | |
| 20363 | if (el_mode != DFmode && el_mode != SFmode) |
| 20364 | return NULL_TREE; |
| 20365 | |
| 20366 | /* Supported vector widths for given FN and single/double precision. Zeros |
| 20367 | are used to fill out unused positions in the arrays. */ |
| 20368 | static const int supported_n[][2][3] = { |
| 20369 | /* Single prec. , Double prec. */ |
| 20370 | { { 16, 0, 0 }, { 2, 4, 8 } }, /* TAN. */ |
| 20371 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP. */ |
| 20372 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP2. */ |
| 20373 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG. */ |
| 20374 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG2. */ |
| 20375 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* COS. */ |
| 20376 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* SIN. */ |
| 20377 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* POW. */ |
| 20378 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* ERF. */ |
| 20379 | { { 4, 8, 16 }, { 2, 8, 0 } }, /* ATAN. */ |
| 20380 | { { 4, 8, 16 }, { 2, 0, 0 } }, /* LOG10. */ |
| 20381 | { { 4, 0, 0 }, { 2, 0, 0 } }, /* EXP10. */ |
| 20382 | { { 4, 0, 0 }, { 2, 0, 0 } }, /* LOG1P. */ |
| 20383 | { { 4, 8, 16 }, { 8, 0, 0 } }, /* ASIN. */ |
| 20384 | { { 4, 16, 0 }, { 0, 0, 0 } }, /* ACOS. */ |
| 20385 | { { 4, 8, 16 }, { 0, 0, 0 } }, /* TANH. */ |
| 20386 | { { 4, 0, 0 }, { 0, 0, 0 } }, /* EXPM1. */ |
| 20387 | { { 4, 8, 0 }, { 0, 0, 0 } }, /* COSH. */ |
| 20388 | }; |
| 20389 | |
| 20390 | /* We cannot simply index the supported_n array with FN since multiple FNs |
| 20391 | may correspond to a single operation (see the definitions of these |
| 20392 | CASE_CFN_* macros). */ |
| 20393 | int i; |
| 20394 | switch (fn) |
| 20395 | { |
| 20396 | CASE_CFN_TAN : i = 0; break; |
| 20397 | CASE_CFN_EXP : i = 1; break; |
| 20398 | CASE_CFN_EXP2 : i = 2; break; |
| 20399 | CASE_CFN_LOG : i = 3; break; |
| 20400 | CASE_CFN_LOG2 : i = 4; break; |
| 20401 | CASE_CFN_COS : i = 5; break; |
| 20402 | CASE_CFN_SIN : i = 6; break; |
| 20403 | CASE_CFN_POW : i = 7; break; |
| 20404 | CASE_CFN_ERF : i = 8; break; |
| 20405 | CASE_CFN_ATAN : i = 9; break; |
| 20406 | CASE_CFN_LOG10 : i = 10; break; |
| 20407 | CASE_CFN_EXP10 : i = 11; break; |
| 20408 | CASE_CFN_LOG1P : i = 12; break; |
| 20409 | CASE_CFN_ASIN : i = 13; break; |
| 20410 | CASE_CFN_ACOS : i = 14; break; |
| 20411 | CASE_CFN_TANH : i = 15; break; |
| 20412 | CASE_CFN_EXPM1 : i = 16; break; |
| 20413 | CASE_CFN_COSH : i = 17; break; |
| 20414 | default: return NULL_TREE; |
| 20415 | } |
| 20416 | |
| 20417 | int j = el_mode == DFmode; |
| 20418 | bool n_is_supported = false; |
| 20419 | for (unsigned k = 0; k < 3; k++) |
| 20420 | if (supported_n[i][j][k] == n) |
| 20421 | { |
| 20422 | n_is_supported = true; |
| 20423 | break; |
| 20424 | } |
| 20425 | if (!n_is_supported) |
| 20426 | return NULL_TREE; |
| 20427 | |
| 20428 | /* Append the precision and the vector width to the function name we are |
| 20429 | constructing. */ |
| 20430 | name[name_len++] = el_mode == DFmode ? 'd' : 's'; |
| 20431 | switch (n) |
| 20432 | { |
| 20433 | case 2: |
| 20434 | case 4: |
| 20435 | case 8: |
| 20436 | name[name_len++] = '0' + n; |
| 20437 | break; |
| 20438 | case 16: |
| 20439 | name[name_len++] = '1'; |
| 20440 | name[name_len++] = '6'; |
| 20441 | break; |
| 20442 | default: |
| 20443 | gcc_unreachable (); |
| 20444 | } |
| 20445 | name[name_len++] = '_'; |
| 20446 | |
| 20447 | /* Append the operation name (steal it from the name of a builtin). */ |
| 20448 | tree fndecl = mathfn_built_in (el_mode == DFmode |
| 20449 | ? double_type_node : float_type_node, fn); |
| 20450 | bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); |
| 20451 | sprintf (s: name + name_len, format: "%s" , bname + 10); |
| 20452 | |
| 20453 | arity = 0; |
| 20454 | for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) |
| 20455 | arity++; |
| 20456 | |
| 20457 | if (arity == 1) |
| 20458 | fntype = build_function_type_list (type_out, type_in, NULL); |
| 20459 | else |
| 20460 | fntype = build_function_type_list (type_out, type_in, type_in, NULL); |
| 20461 | |
| 20462 | /* Build a function declaration for the vectorized function. */ |
| 20463 | new_fndecl = build_decl (BUILTINS_LOCATION, |
| 20464 | FUNCTION_DECL, get_identifier (name), fntype); |
| 20465 | TREE_PUBLIC (new_fndecl) = 1; |
| 20466 | DECL_EXTERNAL (new_fndecl) = 1; |
| 20467 | TREE_READONLY (new_fndecl) = 1; |
| 20468 | |
| 20469 | return new_fndecl; |
| 20470 | } |
| 20471 | |
| 20472 | /* Returns a decl of a function that implements scatter store with |
| 20473 | register type VECTYPE and index type INDEX_TYPE and SCALE. |
| 20474 | Return NULL_TREE if it is not available. */ |
| 20475 | |
| 20476 | static tree |
| 20477 | ix86_vectorize_builtin_scatter (const_tree vectype, |
| 20478 | const_tree index_type, int scale) |
| 20479 | { |
| 20480 | bool si; |
| 20481 | enum ix86_builtins code; |
| 20482 | |
| 20483 | if (!TARGET_AVX512F) |
| 20484 | return NULL_TREE; |
| 20485 | |
| 20486 | if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u) |
| 20487 | ? !TARGET_USE_SCATTER_2PARTS |
| 20488 | : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u) |
| 20489 | ? !TARGET_USE_SCATTER_4PARTS |
| 20490 | : !TARGET_USE_SCATTER_8PARTS)) |
| 20491 | return NULL_TREE; |
| 20492 | |
| 20493 | if ((TREE_CODE (index_type) != INTEGER_TYPE |
| 20494 | && !POINTER_TYPE_P (index_type)) |
| 20495 | || (TYPE_MODE (index_type) != SImode |
| 20496 | && TYPE_MODE (index_type) != DImode)) |
| 20497 | return NULL_TREE; |
| 20498 | |
| 20499 | if (TYPE_PRECISION (index_type) > POINTER_SIZE) |
| 20500 | return NULL_TREE; |
| 20501 | |
| 20502 | /* v*scatter* insn sign extends index to pointer mode. */ |
| 20503 | if (TYPE_PRECISION (index_type) < POINTER_SIZE |
| 20504 | && TYPE_UNSIGNED (index_type)) |
| 20505 | return NULL_TREE; |
| 20506 | |
| 20507 | /* Scale can be 1, 2, 4 or 8. */ |
| 20508 | if (scale <= 0 |
| 20509 | || scale > 8 |
| 20510 | || (scale & (scale - 1)) != 0) |
| 20511 | return NULL_TREE; |
| 20512 | |
| 20513 | si = TYPE_MODE (index_type) == SImode; |
| 20514 | switch (TYPE_MODE (vectype)) |
| 20515 | { |
| 20516 | case E_V8DFmode: |
| 20517 | code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF; |
| 20518 | break; |
| 20519 | case E_V8DImode: |
| 20520 | code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI; |
| 20521 | break; |
| 20522 | case E_V16SFmode: |
| 20523 | code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF; |
| 20524 | break; |
| 20525 | case E_V16SImode: |
| 20526 | code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI; |
| 20527 | break; |
| 20528 | case E_V4DFmode: |
| 20529 | if (TARGET_AVX512VL) |
| 20530 | code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF; |
| 20531 | else |
| 20532 | return NULL_TREE; |
| 20533 | break; |
| 20534 | case E_V4DImode: |
| 20535 | if (TARGET_AVX512VL) |
| 20536 | code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI; |
| 20537 | else |
| 20538 | return NULL_TREE; |
| 20539 | break; |
| 20540 | case E_V8SFmode: |
| 20541 | if (TARGET_AVX512VL) |
| 20542 | code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF; |
| 20543 | else |
| 20544 | return NULL_TREE; |
| 20545 | break; |
| 20546 | case E_V8SImode: |
| 20547 | if (TARGET_AVX512VL) |
| 20548 | code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI; |
| 20549 | else |
| 20550 | return NULL_TREE; |
| 20551 | break; |
| 20552 | case E_V2DFmode: |
| 20553 | if (TARGET_AVX512VL) |
| 20554 | code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF; |
| 20555 | else |
| 20556 | return NULL_TREE; |
| 20557 | break; |
| 20558 | case E_V2DImode: |
| 20559 | if (TARGET_AVX512VL) |
| 20560 | code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI; |
| 20561 | else |
| 20562 | return NULL_TREE; |
| 20563 | break; |
| 20564 | case E_V4SFmode: |
| 20565 | if (TARGET_AVX512VL) |
| 20566 | code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF; |
| 20567 | else |
| 20568 | return NULL_TREE; |
| 20569 | break; |
| 20570 | case E_V4SImode: |
| 20571 | if (TARGET_AVX512VL) |
| 20572 | code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI; |
| 20573 | else |
| 20574 | return NULL_TREE; |
| 20575 | break; |
| 20576 | default: |
| 20577 | return NULL_TREE; |
| 20578 | } |
| 20579 | |
| 20580 | return get_ix86_builtin (c: code); |
| 20581 | } |
| 20582 | |
| 20583 | /* Return true if it is safe to use the rsqrt optabs to optimize |
| 20584 | 1.0/sqrt. */ |
| 20585 | |
| 20586 | static bool |
| 20587 | use_rsqrt_p (machine_mode mode) |
| 20588 | { |
| 20589 | return ((mode == HFmode |
| 20590 | || (TARGET_SSE && TARGET_SSE_MATH)) |
| 20591 | && flag_finite_math_only |
| 20592 | && !flag_trapping_math |
| 20593 | && flag_unsafe_math_optimizations); |
| 20594 | } |
| 20595 | |
| 20596 | /* Helper for avx_vpermilps256_operand et al. This is also used by |
| 20597 | the expansion functions to turn the parallel back into a mask. |
| 20598 | The return value is 0 for no match and the imm8+1 for a match. */ |
| 20599 | |
| 20600 | int |
| 20601 | avx_vpermilp_parallel (rtx par, machine_mode mode) |
| 20602 | { |
| 20603 | unsigned i, nelt = GET_MODE_NUNITS (mode); |
| 20604 | unsigned mask = 0; |
| 20605 | unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */ |
| 20606 | |
| 20607 | if (XVECLEN (par, 0) != (int) nelt) |
| 20608 | return 0; |
| 20609 | |
| 20610 | /* Validate that all of the elements are constants, and not totally |
| 20611 | out of range. Copy the data into an integral array to make the |
| 20612 | subsequent checks easier. */ |
| 20613 | for (i = 0; i < nelt; ++i) |
| 20614 | { |
| 20615 | rtx er = XVECEXP (par, 0, i); |
| 20616 | unsigned HOST_WIDE_INT ei; |
| 20617 | |
| 20618 | if (!CONST_INT_P (er)) |
| 20619 | return 0; |
| 20620 | ei = INTVAL (er); |
| 20621 | if (ei >= nelt) |
| 20622 | return 0; |
| 20623 | ipar[i] = ei; |
| 20624 | } |
| 20625 | |
| 20626 | switch (mode) |
| 20627 | { |
| 20628 | case E_V8DFmode: |
| 20629 | case E_V8DImode: |
| 20630 | /* In the 512-bit DFmode case, we can only move elements within |
| 20631 | a 128-bit lane. First fill the second part of the mask, |
| 20632 | then fallthru. */ |
| 20633 | for (i = 4; i < 6; ++i) |
| 20634 | { |
| 20635 | if (ipar[i] < 4 || ipar[i] >= 6) |
| 20636 | return 0; |
| 20637 | mask |= (ipar[i] - 4) << i; |
| 20638 | } |
| 20639 | for (i = 6; i < 8; ++i) |
| 20640 | { |
| 20641 | if (ipar[i] < 6) |
| 20642 | return 0; |
| 20643 | mask |= (ipar[i] - 6) << i; |
| 20644 | } |
| 20645 | /* FALLTHRU */ |
| 20646 | |
| 20647 | case E_V4DFmode: |
| 20648 | case E_V4DImode: |
| 20649 | /* In the 256-bit DFmode case, we can only move elements within |
| 20650 | a 128-bit lane. */ |
| 20651 | for (i = 0; i < 2; ++i) |
| 20652 | { |
| 20653 | if (ipar[i] >= 2) |
| 20654 | return 0; |
| 20655 | mask |= ipar[i] << i; |
| 20656 | } |
| 20657 | for (i = 2; i < 4; ++i) |
| 20658 | { |
| 20659 | if (ipar[i] < 2) |
| 20660 | return 0; |
| 20661 | mask |= (ipar[i] - 2) << i; |
| 20662 | } |
| 20663 | break; |
| 20664 | |
| 20665 | case E_V16SFmode: |
| 20666 | case E_V16SImode: |
| 20667 | /* In 512 bit SFmode case, permutation in the upper 256 bits |
| 20668 | must mirror the permutation in the lower 256-bits. */ |
| 20669 | for (i = 0; i < 8; ++i) |
| 20670 | if (ipar[i] + 8 != ipar[i + 8]) |
| 20671 | return 0; |
| 20672 | /* FALLTHRU */ |
| 20673 | |
| 20674 | case E_V8SFmode: |
| 20675 | case E_V8SImode: |
| 20676 | /* In 256 bit SFmode case, we have full freedom of |
| 20677 | movement within the low 128-bit lane, but the high 128-bit |
| 20678 | lane must mirror the exact same pattern. */ |
| 20679 | for (i = 0; i < 4; ++i) |
| 20680 | if (ipar[i] + 4 != ipar[i + 4]) |
| 20681 | return 0; |
| 20682 | nelt = 4; |
| 20683 | /* FALLTHRU */ |
| 20684 | |
| 20685 | case E_V2DFmode: |
| 20686 | case E_V2DImode: |
| 20687 | case E_V4SFmode: |
| 20688 | case E_V4SImode: |
| 20689 | /* In the 128-bit case, we've full freedom in the placement of |
| 20690 | the elements from the source operand. */ |
| 20691 | for (i = 0; i < nelt; ++i) |
| 20692 | mask |= ipar[i] << (i * (nelt / 2)); |
| 20693 | break; |
| 20694 | |
| 20695 | default: |
| 20696 | gcc_unreachable (); |
| 20697 | } |
| 20698 | |
| 20699 | /* Make sure success has a non-zero value by adding one. */ |
| 20700 | return mask + 1; |
| 20701 | } |
| 20702 | |
| 20703 | /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by |
| 20704 | the expansion functions to turn the parallel back into a mask. |
| 20705 | The return value is 0 for no match and the imm8+1 for a match. */ |
| 20706 | |
| 20707 | int |
| 20708 | avx_vperm2f128_parallel (rtx par, machine_mode mode) |
| 20709 | { |
| 20710 | unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; |
| 20711 | unsigned mask = 0; |
| 20712 | unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */ |
| 20713 | |
| 20714 | if (XVECLEN (par, 0) != (int) nelt) |
| 20715 | return 0; |
| 20716 | |
| 20717 | /* Validate that all of the elements are constants, and not totally |
| 20718 | out of range. Copy the data into an integral array to make the |
| 20719 | subsequent checks easier. */ |
| 20720 | for (i = 0; i < nelt; ++i) |
| 20721 | { |
| 20722 | rtx er = XVECEXP (par, 0, i); |
| 20723 | unsigned HOST_WIDE_INT ei; |
| 20724 | |
| 20725 | if (!CONST_INT_P (er)) |
| 20726 | return 0; |
| 20727 | ei = INTVAL (er); |
| 20728 | if (ei >= 2 * nelt) |
| 20729 | return 0; |
| 20730 | ipar[i] = ei; |
| 20731 | } |
| 20732 | |
| 20733 | /* Validate that the halves of the permute are halves. */ |
| 20734 | for (i = 0; i < nelt2 - 1; ++i) |
| 20735 | if (ipar[i] + 1 != ipar[i + 1]) |
| 20736 | return 0; |
| 20737 | for (i = nelt2; i < nelt - 1; ++i) |
| 20738 | if (ipar[i] + 1 != ipar[i + 1]) |
| 20739 | return 0; |
| 20740 | |
| 20741 | /* Reconstruct the mask. */ |
| 20742 | for (i = 0; i < 2; ++i) |
| 20743 | { |
| 20744 | unsigned e = ipar[i * nelt2]; |
| 20745 | if (e % nelt2) |
| 20746 | return 0; |
| 20747 | e /= nelt2; |
| 20748 | mask |= e << (i * 4); |
| 20749 | } |
| 20750 | |
| 20751 | /* Make sure success has a non-zero value by adding one. */ |
| 20752 | return mask + 1; |
| 20753 | } |
| 20754 | |
| 20755 | /* Return a mask of VPTERNLOG operands that do not affect output. */ |
| 20756 | |
| 20757 | int |
| 20758 | vpternlog_redundant_operand_mask (rtx pternlog_imm) |
| 20759 | { |
| 20760 | int mask = 0; |
| 20761 | int imm8 = INTVAL (pternlog_imm); |
| 20762 | |
| 20763 | if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F)) |
| 20764 | mask |= 1; |
| 20765 | if (((imm8 >> 2) & 0x33) == (imm8 & 0x33)) |
| 20766 | mask |= 2; |
| 20767 | if (((imm8 >> 1) & 0x55) == (imm8 & 0x55)) |
| 20768 | mask |= 4; |
| 20769 | |
| 20770 | return mask; |
| 20771 | } |
| 20772 | |
| 20773 | /* Eliminate false dependencies on operands that do not affect output |
| 20774 | by substituting other operands of a VPTERNLOG. */ |
| 20775 | |
| 20776 | void |
| 20777 | substitute_vpternlog_operands (rtx *operands) |
| 20778 | { |
| 20779 | int mask = vpternlog_redundant_operand_mask (pternlog_imm: operands[4]); |
| 20780 | |
| 20781 | if (mask & 1) /* The first operand is redundant. */ |
| 20782 | operands[1] = operands[2]; |
| 20783 | |
| 20784 | if (mask & 2) /* The second operand is redundant. */ |
| 20785 | operands[2] = operands[1]; |
| 20786 | |
| 20787 | if (mask & 4) /* The third operand is redundant. */ |
| 20788 | operands[3] = operands[1]; |
| 20789 | else if (REG_P (operands[3])) |
| 20790 | { |
| 20791 | if (mask & 1) |
| 20792 | operands[1] = operands[3]; |
| 20793 | if (mask & 2) |
| 20794 | operands[2] = operands[3]; |
| 20795 | } |
| 20796 | } |
| 20797 | |
| 20798 | /* Return a register priority for hard reg REGNO. */ |
| 20799 | static int |
| 20800 | ix86_register_priority (int hard_regno) |
| 20801 | { |
| 20802 | /* ebp and r13 as the base always wants a displacement, r12 as the |
| 20803 | base always wants an index. So discourage their usage in an |
| 20804 | address. */ |
| 20805 | if (hard_regno == R12_REG || hard_regno == R13_REG) |
| 20806 | return 0; |
| 20807 | if (hard_regno == BP_REG) |
| 20808 | return 1; |
| 20809 | /* New x86-64 int registers result in bigger code size. Discourage them. */ |
| 20810 | if (REX_INT_REGNO_P (hard_regno)) |
| 20811 | return 2; |
| 20812 | if (REX2_INT_REGNO_P (hard_regno)) |
| 20813 | return 2; |
| 20814 | /* New x86-64 SSE registers result in bigger code size. Discourage them. */ |
| 20815 | if (REX_SSE_REGNO_P (hard_regno)) |
| 20816 | return 2; |
| 20817 | if (EXT_REX_SSE_REGNO_P (hard_regno)) |
| 20818 | return 1; |
| 20819 | /* Usage of AX register results in smaller code. Prefer it. */ |
| 20820 | if (hard_regno == AX_REG) |
| 20821 | return 4; |
| 20822 | return 3; |
| 20823 | } |
| 20824 | |
| 20825 | /* Implement TARGET_PREFERRED_RELOAD_CLASS. |
| 20826 | |
| 20827 | Put float CONST_DOUBLE in the constant pool instead of fp regs. |
| 20828 | QImode must go into class Q_REGS. |
| 20829 | Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and |
| 20830 | movdf to do mem-to-mem moves through integer regs. */ |
| 20831 | |
| 20832 | static reg_class_t |
| 20833 | ix86_preferred_reload_class (rtx x, reg_class_t regclass) |
| 20834 | { |
| 20835 | machine_mode mode = GET_MODE (x); |
| 20836 | |
| 20837 | /* We're only allowed to return a subclass of CLASS. Many of the |
| 20838 | following checks fail for NO_REGS, so eliminate that early. */ |
| 20839 | if (regclass == NO_REGS) |
| 20840 | return NO_REGS; |
| 20841 | |
| 20842 | /* All classes can load zeros. */ |
| 20843 | if (x == CONST0_RTX (mode)) |
| 20844 | return regclass; |
| 20845 | |
| 20846 | /* Force constants into memory if we are loading a (nonzero) constant into |
| 20847 | an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK |
| 20848 | instructions to load from a constant. */ |
| 20849 | if (CONSTANT_P (x) |
| 20850 | && (MAYBE_MMX_CLASS_P (regclass) |
| 20851 | || MAYBE_SSE_CLASS_P (regclass) |
| 20852 | || MAYBE_MASK_CLASS_P (regclass))) |
| 20853 | return NO_REGS; |
| 20854 | |
| 20855 | /* Floating-point constants need more complex checks. */ |
| 20856 | if (CONST_DOUBLE_P (x)) |
| 20857 | { |
| 20858 | /* General regs can load everything. */ |
| 20859 | if (INTEGER_CLASS_P (regclass)) |
| 20860 | return regclass; |
| 20861 | |
| 20862 | /* Floats can load 0 and 1 plus some others. Note that we eliminated |
| 20863 | zero above. We only want to wind up preferring 80387 registers if |
| 20864 | we plan on doing computation with them. */ |
| 20865 | if (IS_STACK_MODE (mode) |
| 20866 | && standard_80387_constant_p (x) > 0) |
| 20867 | { |
| 20868 | /* Limit class to FP regs. */ |
| 20869 | if (FLOAT_CLASS_P (regclass)) |
| 20870 | return FLOAT_REGS; |
| 20871 | } |
| 20872 | |
| 20873 | return NO_REGS; |
| 20874 | } |
| 20875 | |
| 20876 | /* Prefer SSE if we can use them for math. Also allow integer regs |
| 20877 | when moves between register units are cheap. */ |
| 20878 | if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) |
| 20879 | { |
| 20880 | if (TARGET_INTER_UNIT_MOVES_FROM_VEC |
| 20881 | && TARGET_INTER_UNIT_MOVES_TO_VEC |
| 20882 | && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode)) |
| 20883 | return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS; |
| 20884 | else |
| 20885 | return SSE_CLASS_P (regclass) ? regclass : NO_REGS; |
| 20886 | } |
| 20887 | |
| 20888 | /* Generally when we see PLUS here, it's the function invariant |
| 20889 | (plus soft-fp const_int). Which can only be computed into general |
| 20890 | regs. */ |
| 20891 | if (GET_CODE (x) == PLUS) |
| 20892 | return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS; |
| 20893 | |
| 20894 | /* QImode constants are easy to load, but non-constant QImode data |
| 20895 | must go into Q_REGS or ALL_MASK_REGS. */ |
| 20896 | if (GET_MODE (x) == QImode && !CONSTANT_P (x)) |
| 20897 | { |
| 20898 | if (Q_CLASS_P (regclass)) |
| 20899 | return regclass; |
| 20900 | else if (reg_class_subset_p (Q_REGS, regclass)) |
| 20901 | return Q_REGS; |
| 20902 | else if (MASK_CLASS_P (regclass)) |
| 20903 | return regclass; |
| 20904 | else |
| 20905 | return NO_REGS; |
| 20906 | } |
| 20907 | |
| 20908 | return regclass; |
| 20909 | } |
| 20910 | |
| 20911 | /* Discourage putting floating-point values in SSE registers unless |
| 20912 | SSE math is being used, and likewise for the 387 registers. */ |
| 20913 | static reg_class_t |
| 20914 | ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) |
| 20915 | { |
| 20916 | /* Restrict the output reload class to the register bank that we are doing |
| 20917 | math on. If we would like not to return a subset of CLASS, reject this |
| 20918 | alternative: if reload cannot do this, it will still use its choice. */ |
| 20919 | machine_mode mode = GET_MODE (x); |
| 20920 | if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) |
| 20921 | return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS; |
| 20922 | |
| 20923 | if (IS_STACK_MODE (mode)) |
| 20924 | return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; |
| 20925 | |
| 20926 | return regclass; |
| 20927 | } |
| 20928 | |
| 20929 | static reg_class_t |
| 20930 | ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, |
| 20931 | machine_mode mode, secondary_reload_info *sri) |
| 20932 | { |
| 20933 | /* Double-word spills from general registers to non-offsettable memory |
| 20934 | references (zero-extended addresses) require special handling. */ |
| 20935 | if (TARGET_64BIT |
| 20936 | && MEM_P (x) |
| 20937 | && GET_MODE_SIZE (mode) > UNITS_PER_WORD |
| 20938 | && INTEGER_CLASS_P (rclass) |
| 20939 | && !offsettable_memref_p (x)) |
| 20940 | { |
| 20941 | sri->icode = (in_p |
| 20942 | ? CODE_FOR_reload_noff_load |
| 20943 | : CODE_FOR_reload_noff_store); |
| 20944 | /* Add the cost of moving address to a temporary. */ |
| 20945 | sri->extra_cost = 1; |
| 20946 | |
| 20947 | return NO_REGS; |
| 20948 | } |
| 20949 | |
| 20950 | /* QImode spills from non-QI registers require |
| 20951 | intermediate register on 32bit targets. */ |
| 20952 | if (mode == QImode |
| 20953 | && ((!TARGET_64BIT && !in_p |
| 20954 | && INTEGER_CLASS_P (rclass) |
| 20955 | && MAYBE_NON_Q_CLASS_P (rclass)) |
| 20956 | || (!TARGET_AVX512DQ |
| 20957 | && MAYBE_MASK_CLASS_P (rclass)))) |
| 20958 | { |
| 20959 | int regno = true_regnum (x); |
| 20960 | |
| 20961 | /* Return Q_REGS if the operand is in memory. */ |
| 20962 | if (regno == -1) |
| 20963 | return Q_REGS; |
| 20964 | |
| 20965 | return NO_REGS; |
| 20966 | } |
| 20967 | |
| 20968 | /* Require movement to gpr, and then store to memory. */ |
| 20969 | if ((mode == HFmode || mode == HImode || mode == V2QImode |
| 20970 | || mode == BFmode) |
| 20971 | && !TARGET_SSE4_1 |
| 20972 | && SSE_CLASS_P (rclass) |
| 20973 | && !in_p && MEM_P (x)) |
| 20974 | { |
| 20975 | sri->extra_cost = 1; |
| 20976 | return GENERAL_REGS; |
| 20977 | } |
| 20978 | |
| 20979 | /* This condition handles corner case where an expression involving |
| 20980 | pointers gets vectorized. We're trying to use the address of a |
| 20981 | stack slot as a vector initializer. |
| 20982 | |
| 20983 | (set (reg:V2DI 74 [ vect_cst_.2 ]) |
| 20984 | (vec_duplicate:V2DI (reg/f:DI 20 frame))) |
| 20985 | |
| 20986 | Eventually frame gets turned into sp+offset like this: |
| 20987 | |
| 20988 | (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
| 20989 | (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) |
| 20990 | (const_int 392 [0x188])))) |
| 20991 | |
| 20992 | That later gets turned into: |
| 20993 | |
| 20994 | (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
| 20995 | (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) |
| 20996 | (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])))) |
| 20997 | |
| 20998 | We'll have the following reload recorded: |
| 20999 | |
| 21000 | Reload 0: reload_in (DI) = |
| 21001 | (plus:DI (reg/f:DI 7 sp) |
| 21002 | (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) |
| 21003 | reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
| 21004 | SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine |
| 21005 | reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188])) |
| 21006 | reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
| 21007 | reload_reg_rtx: (reg:V2DI 22 xmm1) |
| 21008 | |
| 21009 | Which isn't going to work since SSE instructions can't handle scalar |
| 21010 | additions. Returning GENERAL_REGS forces the addition into integer |
| 21011 | register and reload can handle subsequent reloads without problems. */ |
| 21012 | |
| 21013 | if (in_p && GET_CODE (x) == PLUS |
| 21014 | && SSE_CLASS_P (rclass) |
| 21015 | && SCALAR_INT_MODE_P (mode)) |
| 21016 | return GENERAL_REGS; |
| 21017 | |
| 21018 | return NO_REGS; |
| 21019 | } |
| 21020 | |
| 21021 | /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ |
| 21022 | |
| 21023 | static bool |
| 21024 | ix86_class_likely_spilled_p (reg_class_t rclass) |
| 21025 | { |
| 21026 | switch (rclass) |
| 21027 | { |
| 21028 | case AREG: |
| 21029 | case DREG: |
| 21030 | case CREG: |
| 21031 | case BREG: |
| 21032 | case AD_REGS: |
| 21033 | case SIREG: |
| 21034 | case DIREG: |
| 21035 | case SSE_FIRST_REG: |
| 21036 | case FP_TOP_REG: |
| 21037 | case FP_SECOND_REG: |
| 21038 | return true; |
| 21039 | |
| 21040 | default: |
| 21041 | break; |
| 21042 | } |
| 21043 | |
| 21044 | return false; |
| 21045 | } |
| 21046 | |
| 21047 | /* Implement TARGET_CALLEE_SAVE_COST. */ |
| 21048 | |
| 21049 | static int |
| 21050 | ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode, |
| 21051 | unsigned int, int mem_cost, const HARD_REG_SET &, bool) |
| 21052 | { |
| 21053 | /* Account for the fact that push and pop are shorter and do their |
| 21054 | own allocation and deallocation. */ |
| 21055 | if (GENERAL_REGNO_P (hard_regno)) |
| 21056 | { |
| 21057 | /* push is 1 byte while typical spill is 4-5 bytes. |
| 21058 | ??? We probably should adjust size costs accordingly. |
| 21059 | Costs are relative to reg-reg move that has 2 bytes for 32bit |
| 21060 | and 3 bytes otherwise. Be sure that no cost table sets cost |
| 21061 | to 2, so we end up with 0. */ |
| 21062 | if (mem_cost <= 2 || optimize_function_for_size_p (cfun)) |
| 21063 | return 1; |
| 21064 | return mem_cost - 2; |
| 21065 | } |
| 21066 | return mem_cost; |
| 21067 | } |
| 21068 | |
| 21069 | /* Return true if a set of DST by the expression SRC should be allowed. |
| 21070 | This prevents complex sets of likely_spilled hard regs before split1. */ |
| 21071 | |
| 21072 | bool |
| 21073 | ix86_hardreg_mov_ok (rtx dst, rtx src) |
| 21074 | { |
| 21075 | /* Avoid complex sets of likely_spilled hard registers before reload. */ |
| 21076 | if (REG_P (dst) && HARD_REGISTER_P (dst) |
| 21077 | && !REG_P (src) && !MEM_P (src) |
| 21078 | && !(VECTOR_MODE_P (GET_MODE (dst)) |
| 21079 | ? standard_sse_constant_p (x: src, GET_MODE (dst)) |
| 21080 | : x86_64_immediate_operand (src, GET_MODE (dst))) |
| 21081 | && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))) |
| 21082 | && ix86_pre_reload_split ()) |
| 21083 | return false; |
| 21084 | return true; |
| 21085 | } |
| 21086 | |
| 21087 | /* If we are copying between registers from different register sets |
| 21088 | (e.g. FP and integer), we may need a memory location. |
| 21089 | |
| 21090 | The function can't work reliably when one of the CLASSES is a class |
| 21091 | containing registers from multiple sets. We avoid this by never combining |
| 21092 | different sets in a single alternative in the machine description. |
| 21093 | Ensure that this constraint holds to avoid unexpected surprises. |
| 21094 | |
| 21095 | When STRICT is false, we are being called from REGISTER_MOVE_COST, |
| 21096 | so do not enforce these sanity checks. |
| 21097 | |
| 21098 | To optimize register_move_cost performance, define inline variant. */ |
| 21099 | |
| 21100 | static inline bool |
| 21101 | inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, |
| 21102 | reg_class_t class2, int strict) |
| 21103 | { |
| 21104 | if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) |
| 21105 | return false; |
| 21106 | |
| 21107 | if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) |
| 21108 | || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) |
| 21109 | || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) |
| 21110 | || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) |
| 21111 | || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) |
| 21112 | || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2) |
| 21113 | || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1) |
| 21114 | || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2)) |
| 21115 | { |
| 21116 | gcc_assert (!strict || lra_in_progress); |
| 21117 | return true; |
| 21118 | } |
| 21119 | |
| 21120 | if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) |
| 21121 | return true; |
| 21122 | |
| 21123 | /* ??? This is a lie. We do have moves between mmx/general, and for |
| 21124 | mmx/sse2. But by saying we need secondary memory we discourage the |
| 21125 | register allocator from using the mmx registers unless needed. */ |
| 21126 | if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) |
| 21127 | return true; |
| 21128 | |
| 21129 | /* Between mask and general, we have moves no larger than word size. */ |
| 21130 | if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) |
| 21131 | { |
| 21132 | if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)) |
| 21133 | || GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| 21134 | return true; |
| 21135 | } |
| 21136 | |
| 21137 | if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) |
| 21138 | { |
| 21139 | /* SSE1 doesn't have any direct moves from other classes. */ |
| 21140 | if (!TARGET_SSE2) |
| 21141 | return true; |
| 21142 | |
| 21143 | if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))) |
| 21144 | return true; |
| 21145 | |
| 21146 | /* If the target says that inter-unit moves are more expensive |
| 21147 | than moving through memory, then don't generate them. */ |
| 21148 | if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC) |
| 21149 | || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC)) |
| 21150 | return true; |
| 21151 | |
| 21152 | /* With SSE4.1, *mov{ti,di}_internal supports moves between |
| 21153 | SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}. */ |
| 21154 | if (TARGET_SSE4_1 |
| 21155 | && (TARGET_64BIT ? mode == TImode : mode == DImode)) |
| 21156 | return false; |
| 21157 | |
| 21158 | int msize = GET_MODE_SIZE (mode); |
| 21159 | |
| 21160 | /* Between SSE and general, we have moves no larger than word size. */ |
| 21161 | if (msize > UNITS_PER_WORD) |
| 21162 | return true; |
| 21163 | |
| 21164 | /* In addition to SImode moves, HImode moves are supported for SSE2 and above, |
| 21165 | Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */ |
| 21166 | int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode); |
| 21167 | |
| 21168 | if (msize < minsize) |
| 21169 | return true; |
| 21170 | } |
| 21171 | |
| 21172 | return false; |
| 21173 | } |
| 21174 | |
| 21175 | /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ |
| 21176 | |
| 21177 | static bool |
| 21178 | ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1, |
| 21179 | reg_class_t class2) |
| 21180 | { |
| 21181 | return inline_secondary_memory_needed (mode, class1, class2, strict: true); |
| 21182 | } |
| 21183 | |
| 21184 | /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. |
| 21185 | |
| 21186 | get_secondary_mem widens integral modes to BITS_PER_WORD. |
| 21187 | There is no need to emit full 64 bit move on 64 bit targets |
| 21188 | for integral modes that can be moved using 32 bit move. */ |
| 21189 | |
| 21190 | static machine_mode |
| 21191 | ix86_secondary_memory_needed_mode (machine_mode mode) |
| 21192 | { |
| 21193 | if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode)) |
| 21194 | return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); |
| 21195 | return mode; |
| 21196 | } |
| 21197 | |
| 21198 | /* Implement the TARGET_CLASS_MAX_NREGS hook. |
| 21199 | |
| 21200 | On the 80386, this is the size of MODE in words, |
| 21201 | except in the FP regs, where a single reg is always enough. */ |
| 21202 | |
| 21203 | static unsigned char |
| 21204 | ix86_class_max_nregs (reg_class_t rclass, machine_mode mode) |
| 21205 | { |
| 21206 | if (MAYBE_INTEGER_CLASS_P (rclass)) |
| 21207 | { |
| 21208 | if (mode == XFmode) |
| 21209 | return (TARGET_64BIT ? 2 : 3); |
| 21210 | else if (mode == XCmode) |
| 21211 | return (TARGET_64BIT ? 4 : 6); |
| 21212 | else |
| 21213 | return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); |
| 21214 | } |
| 21215 | else |
| 21216 | { |
| 21217 | if (COMPLEX_MODE_P (mode)) |
| 21218 | return 2; |
| 21219 | else |
| 21220 | return 1; |
| 21221 | } |
| 21222 | } |
| 21223 | |
| 21224 | /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ |
| 21225 | |
| 21226 | static bool |
| 21227 | ix86_can_change_mode_class (machine_mode from, machine_mode to, |
| 21228 | reg_class_t regclass) |
| 21229 | { |
| 21230 | if (from == to) |
| 21231 | return true; |
| 21232 | |
| 21233 | /* x87 registers can't do subreg at all, as all values are reformatted |
| 21234 | to extended precision. |
| 21235 | |
| 21236 | ??? middle-end queries mode changes for ALL_REGS and this makes |
| 21237 | vec_series_lowpart_p to always return false. We probably should |
| 21238 | restrict this to modes supported by i387 and check if it is enabled. */ |
| 21239 | if (MAYBE_FLOAT_CLASS_P (regclass)) |
| 21240 | return false; |
| 21241 | |
| 21242 | if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) |
| 21243 | { |
| 21244 | /* Vector registers do not support QI or HImode loads. If we don't |
| 21245 | disallow a change to these modes, reload will assume it's ok to |
| 21246 | drop the subreg from (subreg:SI (reg:HI 100) 0). This affects |
| 21247 | the vec_dupv4hi pattern. |
| 21248 | NB: SSE2 can load 16bit data to sse register via pinsrw. */ |
| 21249 | int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4; |
| 21250 | if (GET_MODE_SIZE (from) < mov_size |
| 21251 | || GET_MODE_SIZE (to) < mov_size) |
| 21252 | return false; |
| 21253 | } |
| 21254 | |
| 21255 | return true; |
| 21256 | } |
| 21257 | |
| 21258 | /* Return index of MODE in the sse load/store tables. */ |
| 21259 | |
| 21260 | static inline int |
| 21261 | sse_store_index (machine_mode mode) |
| 21262 | { |
| 21263 | /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store |
| 21264 | costs to processor_costs, which requires changes to all entries in |
| 21265 | processor cost table. */ |
| 21266 | if (mode == E_HFmode) |
| 21267 | mode = E_SFmode; |
| 21268 | |
| 21269 | switch (GET_MODE_SIZE (mode)) |
| 21270 | { |
| 21271 | case 4: |
| 21272 | return 0; |
| 21273 | case 8: |
| 21274 | return 1; |
| 21275 | case 16: |
| 21276 | return 2; |
| 21277 | case 32: |
| 21278 | return 3; |
| 21279 | case 64: |
| 21280 | return 4; |
| 21281 | default: |
| 21282 | return -1; |
| 21283 | } |
| 21284 | } |
| 21285 | |
| 21286 | /* Return the cost of moving data of mode M between a |
| 21287 | register and memory. A value of 2 is the default; this cost is |
| 21288 | relative to those in `REGISTER_MOVE_COST'. |
| 21289 | |
| 21290 | This function is used extensively by register_move_cost that is used to |
| 21291 | build tables at startup. Make it inline in this case. |
| 21292 | When IN is 2, return maximum of in and out move cost. |
| 21293 | |
| 21294 | If moving between registers and memory is more expensive than |
| 21295 | between two registers, you should define this macro to express the |
| 21296 | relative cost. |
| 21297 | |
| 21298 | Model also increased moving costs of QImode registers in non |
| 21299 | Q_REGS classes. |
| 21300 | */ |
| 21301 | static inline int |
| 21302 | inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) |
| 21303 | { |
| 21304 | int cost; |
| 21305 | |
| 21306 | if (FLOAT_CLASS_P (regclass)) |
| 21307 | { |
| 21308 | int index; |
| 21309 | switch (mode) |
| 21310 | { |
| 21311 | case E_SFmode: |
| 21312 | index = 0; |
| 21313 | break; |
| 21314 | case E_DFmode: |
| 21315 | index = 1; |
| 21316 | break; |
| 21317 | case E_XFmode: |
| 21318 | index = 2; |
| 21319 | break; |
| 21320 | default: |
| 21321 | return 100; |
| 21322 | } |
| 21323 | if (in == 2) |
| 21324 | return MAX (ix86_cost->hard_register.fp_load [index], |
| 21325 | ix86_cost->hard_register.fp_store [index]); |
| 21326 | return in ? ix86_cost->hard_register.fp_load [index] |
| 21327 | : ix86_cost->hard_register.fp_store [index]; |
| 21328 | } |
| 21329 | if (SSE_CLASS_P (regclass)) |
| 21330 | { |
| 21331 | int index = sse_store_index (mode); |
| 21332 | if (index == -1) |
| 21333 | return 100; |
| 21334 | if (in == 2) |
| 21335 | return MAX (ix86_cost->hard_register.sse_load [index], |
| 21336 | ix86_cost->hard_register.sse_store [index]); |
| 21337 | return in ? ix86_cost->hard_register.sse_load [index] |
| 21338 | : ix86_cost->hard_register.sse_store [index]; |
| 21339 | } |
| 21340 | if (MASK_CLASS_P (regclass)) |
| 21341 | { |
| 21342 | int index; |
| 21343 | switch (GET_MODE_SIZE (mode)) |
| 21344 | { |
| 21345 | case 1: |
| 21346 | index = 0; |
| 21347 | break; |
| 21348 | case 2: |
| 21349 | index = 1; |
| 21350 | break; |
| 21351 | /* DImode loads and stores assumed to cost the same as SImode. */ |
| 21352 | case 4: |
| 21353 | case 8: |
| 21354 | index = 2; |
| 21355 | break; |
| 21356 | default: |
| 21357 | return 100; |
| 21358 | } |
| 21359 | |
| 21360 | if (in == 2) |
| 21361 | return MAX (ix86_cost->hard_register.mask_load[index], |
| 21362 | ix86_cost->hard_register.mask_store[index]); |
| 21363 | return in ? ix86_cost->hard_register.mask_load[2] |
| 21364 | : ix86_cost->hard_register.mask_store[2]; |
| 21365 | } |
| 21366 | if (MMX_CLASS_P (regclass)) |
| 21367 | { |
| 21368 | int index; |
| 21369 | switch (GET_MODE_SIZE (mode)) |
| 21370 | { |
| 21371 | case 4: |
| 21372 | index = 0; |
| 21373 | break; |
| 21374 | case 8: |
| 21375 | index = 1; |
| 21376 | break; |
| 21377 | default: |
| 21378 | return 100; |
| 21379 | } |
| 21380 | if (in == 2) |
| 21381 | return MAX (ix86_cost->hard_register.mmx_load [index], |
| 21382 | ix86_cost->hard_register.mmx_store [index]); |
| 21383 | return in ? ix86_cost->hard_register.mmx_load [index] |
| 21384 | : ix86_cost->hard_register.mmx_store [index]; |
| 21385 | } |
| 21386 | switch (GET_MODE_SIZE (mode)) |
| 21387 | { |
| 21388 | case 1: |
| 21389 | if (Q_CLASS_P (regclass) || TARGET_64BIT) |
| 21390 | { |
| 21391 | if (!in) |
| 21392 | return ix86_cost->hard_register.int_store[0]; |
| 21393 | if (TARGET_PARTIAL_REG_DEPENDENCY |
| 21394 | && optimize_function_for_speed_p (cfun)) |
| 21395 | cost = ix86_cost->hard_register.movzbl_load; |
| 21396 | else |
| 21397 | cost = ix86_cost->hard_register.int_load[0]; |
| 21398 | if (in == 2) |
| 21399 | return MAX (cost, ix86_cost->hard_register.int_store[0]); |
| 21400 | return cost; |
| 21401 | } |
| 21402 | else |
| 21403 | { |
| 21404 | if (in == 2) |
| 21405 | return MAX (ix86_cost->hard_register.movzbl_load, |
| 21406 | ix86_cost->hard_register.int_store[0] + 4); |
| 21407 | if (in) |
| 21408 | return ix86_cost->hard_register.movzbl_load; |
| 21409 | else |
| 21410 | return ix86_cost->hard_register.int_store[0] + 4; |
| 21411 | } |
| 21412 | break; |
| 21413 | case 2: |
| 21414 | { |
| 21415 | int cost; |
| 21416 | if (in == 2) |
| 21417 | cost = MAX (ix86_cost->hard_register.int_load[1], |
| 21418 | ix86_cost->hard_register.int_store[1]); |
| 21419 | else |
| 21420 | cost = in ? ix86_cost->hard_register.int_load[1] |
| 21421 | : ix86_cost->hard_register.int_store[1]; |
| 21422 | |
| 21423 | if (mode == E_HFmode) |
| 21424 | { |
| 21425 | /* Prefer SSE over GPR for HFmode. */ |
| 21426 | int sse_cost; |
| 21427 | int index = sse_store_index (mode); |
| 21428 | if (in == 2) |
| 21429 | sse_cost = MAX (ix86_cost->hard_register.sse_load[index], |
| 21430 | ix86_cost->hard_register.sse_store[index]); |
| 21431 | else |
| 21432 | sse_cost = (in |
| 21433 | ? ix86_cost->hard_register.sse_load [index] |
| 21434 | : ix86_cost->hard_register.sse_store [index]); |
| 21435 | if (sse_cost >= cost) |
| 21436 | cost = sse_cost + 1; |
| 21437 | } |
| 21438 | return cost; |
| 21439 | } |
| 21440 | default: |
| 21441 | if (in == 2) |
| 21442 | cost = MAX (ix86_cost->hard_register.int_load[2], |
| 21443 | ix86_cost->hard_register.int_store[2]); |
| 21444 | else if (in) |
| 21445 | cost = ix86_cost->hard_register.int_load[2]; |
| 21446 | else |
| 21447 | cost = ix86_cost->hard_register.int_store[2]; |
| 21448 | /* Multiply with the number of GPR moves needed. */ |
| 21449 | return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD); |
| 21450 | } |
| 21451 | } |
| 21452 | |
| 21453 | static int |
| 21454 | ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in) |
| 21455 | { |
| 21456 | return inline_memory_move_cost (mode, regclass: (enum reg_class) regclass, in: in ? 1 : 0); |
| 21457 | } |
| 21458 | |
| 21459 | |
| 21460 | /* Return the cost of moving data from a register in class CLASS1 to |
| 21461 | one in class CLASS2. |
| 21462 | |
| 21463 | It is not required that the cost always equal 2 when FROM is the same as TO; |
| 21464 | on some machines it is expensive to move between registers if they are not |
| 21465 | general registers. */ |
| 21466 | |
| 21467 | static int |
| 21468 | ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, |
| 21469 | reg_class_t class2_i) |
| 21470 | { |
| 21471 | enum reg_class class1 = (enum reg_class) class1_i; |
| 21472 | enum reg_class class2 = (enum reg_class) class2_i; |
| 21473 | |
| 21474 | /* In case we require secondary memory, compute cost of the store followed |
| 21475 | by load. In order to avoid bad register allocation choices, we need |
| 21476 | for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ |
| 21477 | |
| 21478 | if (inline_secondary_memory_needed (mode, class1, class2, strict: false)) |
| 21479 | { |
| 21480 | int cost = 1; |
| 21481 | |
| 21482 | cost += inline_memory_move_cost (mode, regclass: class1, in: 2); |
| 21483 | cost += inline_memory_move_cost (mode, regclass: class2, in: 2); |
| 21484 | |
| 21485 | /* In case of copying from general_purpose_register we may emit multiple |
| 21486 | stores followed by single load causing memory size mismatch stall. |
| 21487 | Count this as arbitrarily high cost of 20. */ |
| 21488 | if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD |
| 21489 | && TARGET_MEMORY_MISMATCH_STALL |
| 21490 | && targetm.class_max_nregs (class1, mode) |
| 21491 | > targetm.class_max_nregs (class2, mode)) |
| 21492 | cost += 20; |
| 21493 | |
| 21494 | /* In the case of FP/MMX moves, the registers actually overlap, and we |
| 21495 | have to switch modes in order to treat them differently. */ |
| 21496 | if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) |
| 21497 | || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) |
| 21498 | cost += 20; |
| 21499 | |
| 21500 | return cost; |
| 21501 | } |
| 21502 | |
| 21503 | /* Moves between MMX and non-MMX units require secondary memory. */ |
| 21504 | if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) |
| 21505 | gcc_unreachable (); |
| 21506 | |
| 21507 | if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) |
| 21508 | return (SSE_CLASS_P (class1) |
| 21509 | ? ix86_cost->hard_register.sse_to_integer |
| 21510 | : ix86_cost->hard_register.integer_to_sse); |
| 21511 | |
| 21512 | /* Moves between mask register and GPR. */ |
| 21513 | if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) |
| 21514 | { |
| 21515 | return (MASK_CLASS_P (class1) |
| 21516 | ? ix86_cost->hard_register.mask_to_integer |
| 21517 | : ix86_cost->hard_register.integer_to_mask); |
| 21518 | } |
| 21519 | /* Moving between mask registers. */ |
| 21520 | if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2)) |
| 21521 | return ix86_cost->hard_register.mask_move; |
| 21522 | |
| 21523 | if (MAYBE_FLOAT_CLASS_P (class1)) |
| 21524 | return ix86_cost->hard_register.fp_move; |
| 21525 | if (MAYBE_SSE_CLASS_P (class1)) |
| 21526 | { |
| 21527 | if (GET_MODE_BITSIZE (mode) <= 128) |
| 21528 | return ix86_cost->hard_register.xmm_move; |
| 21529 | if (GET_MODE_BITSIZE (mode) <= 256) |
| 21530 | return ix86_cost->hard_register.ymm_move; |
| 21531 | return ix86_cost->hard_register.zmm_move; |
| 21532 | } |
| 21533 | if (MAYBE_MMX_CLASS_P (class1)) |
| 21534 | return ix86_cost->hard_register.mmx_move; |
| 21535 | return 2; |
| 21536 | } |
| 21537 | |
| 21538 | /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in |
| 21539 | words of a value of mode MODE but can be less for certain modes in |
| 21540 | special long registers. |
| 21541 | |
| 21542 | Actually there are no two word move instructions for consecutive |
| 21543 | registers. And only registers 0-3 may have mov byte instructions |
| 21544 | applied to them. */ |
| 21545 | |
| 21546 | static unsigned int |
| 21547 | ix86_hard_regno_nregs (unsigned int regno, machine_mode mode) |
| 21548 | { |
| 21549 | if (GENERAL_REGNO_P (regno)) |
| 21550 | { |
| 21551 | if (mode == XFmode) |
| 21552 | return TARGET_64BIT ? 2 : 3; |
| 21553 | if (mode == XCmode) |
| 21554 | return TARGET_64BIT ? 4 : 6; |
| 21555 | return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); |
| 21556 | } |
| 21557 | if (COMPLEX_MODE_P (mode)) |
| 21558 | return 2; |
| 21559 | /* Register pair for mask registers. */ |
| 21560 | if (mode == P2QImode || mode == P2HImode) |
| 21561 | return 2; |
| 21562 | |
| 21563 | return 1; |
| 21564 | } |
| 21565 | |
| 21566 | /* Implement REGMODE_NATURAL_SIZE(MODE). */ |
| 21567 | unsigned int |
| 21568 | ix86_regmode_natural_size (machine_mode mode) |
| 21569 | { |
| 21570 | if (mode == P2HImode || mode == P2QImode) |
| 21571 | return GET_MODE_SIZE (mode) / 2; |
| 21572 | return UNITS_PER_WORD; |
| 21573 | } |
| 21574 | |
| 21575 | /* Implement TARGET_HARD_REGNO_MODE_OK. */ |
| 21576 | |
| 21577 | static bool |
| 21578 | ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) |
| 21579 | { |
| 21580 | /* Flags and only flags can only hold CCmode values. */ |
| 21581 | if (CC_REGNO_P (regno)) |
| 21582 | return GET_MODE_CLASS (mode) == MODE_CC; |
| 21583 | if (GET_MODE_CLASS (mode) == MODE_CC |
| 21584 | || GET_MODE_CLASS (mode) == MODE_RANDOM) |
| 21585 | return false; |
| 21586 | if (STACK_REGNO_P (regno)) |
| 21587 | return VALID_FP_MODE_P (mode); |
| 21588 | if (MASK_REGNO_P (regno)) |
| 21589 | { |
| 21590 | /* Register pair only starts at even register number. */ |
| 21591 | if ((mode == P2QImode || mode == P2HImode)) |
| 21592 | return MASK_PAIR_REGNO_P(regno); |
| 21593 | |
| 21594 | return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode)) |
| 21595 | || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode))); |
| 21596 | } |
| 21597 | |
| 21598 | if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) |
| 21599 | return false; |
| 21600 | |
| 21601 | if (SSE_REGNO_P (regno)) |
| 21602 | { |
| 21603 | /* We implement the move patterns for all vector modes into and |
| 21604 | out of SSE registers, even when no operation instructions |
| 21605 | are available. */ |
| 21606 | |
| 21607 | /* For AVX-512 we allow, regardless of regno: |
| 21608 | - XI mode |
| 21609 | - any of 512-bit wide vector mode |
| 21610 | - any scalar mode. */ |
| 21611 | if (TARGET_AVX512F |
| 21612 | && ((VALID_AVX512F_REG_OR_XI_MODE (mode)) |
| 21613 | || VALID_AVX512F_SCALAR_MODE (mode))) |
| 21614 | return true; |
| 21615 | |
| 21616 | /* TODO check for QI/HI scalars. */ |
| 21617 | /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ |
| 21618 | if (TARGET_AVX512VL |
| 21619 | && (VALID_AVX256_REG_OR_OI_MODE (mode) |
| 21620 | || VALID_AVX512VL_128_REG_MODE (mode))) |
| 21621 | return true; |
| 21622 | |
| 21623 | /* xmm16-xmm31 are only available for AVX-512. */ |
| 21624 | if (EXT_REX_SSE_REGNO_P (regno)) |
| 21625 | return false; |
| 21626 | |
| 21627 | /* OImode and AVX modes are available only when AVX is enabled. */ |
| 21628 | return ((TARGET_AVX |
| 21629 | && VALID_AVX256_REG_OR_OI_MODE (mode)) |
| 21630 | || VALID_SSE_REG_MODE (mode) |
| 21631 | || VALID_SSE2_REG_MODE (mode) |
| 21632 | || VALID_MMX_REG_MODE (mode) |
| 21633 | || VALID_MMX_REG_MODE_3DNOW (mode)); |
| 21634 | } |
| 21635 | if (MMX_REGNO_P (regno)) |
| 21636 | { |
| 21637 | /* We implement the move patterns for 3DNOW modes even in MMX mode, |
| 21638 | so if the register is available at all, then we can move data of |
| 21639 | the given mode into or out of it. */ |
| 21640 | return (VALID_MMX_REG_MODE (mode) |
| 21641 | || VALID_MMX_REG_MODE_3DNOW (mode)); |
| 21642 | } |
| 21643 | |
| 21644 | if (mode == QImode) |
| 21645 | { |
| 21646 | /* Take care for QImode values - they can be in non-QI regs, |
| 21647 | but then they do cause partial register stalls. */ |
| 21648 | if (ANY_QI_REGNO_P (regno)) |
| 21649 | return true; |
| 21650 | if (!TARGET_PARTIAL_REG_STALL) |
| 21651 | return true; |
| 21652 | /* LRA checks if the hard register is OK for the given mode. |
| 21653 | QImode values can live in non-QI regs, so we allow all |
| 21654 | registers here. */ |
| 21655 | if (lra_in_progress) |
| 21656 | return true; |
| 21657 | return !can_create_pseudo_p (); |
| 21658 | } |
| 21659 | /* We handle both integer and floats in the general purpose registers. */ |
| 21660 | else if (VALID_INT_MODE_P (mode) |
| 21661 | || VALID_FP_MODE_P (mode)) |
| 21662 | return true; |
| 21663 | /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go |
| 21664 | on to use that value in smaller contexts, this can easily force a |
| 21665 | pseudo to be allocated to GENERAL_REGS. Since this is no worse than |
| 21666 | supporting DImode, allow it. */ |
| 21667 | else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) |
| 21668 | return true; |
| 21669 | |
| 21670 | return false; |
| 21671 | } |
| 21672 | |
| 21673 | /* Implement TARGET_INSN_CALLEE_ABI. */ |
| 21674 | |
| 21675 | const predefined_function_abi & |
| 21676 | ix86_insn_callee_abi (const rtx_insn *insn) |
| 21677 | { |
| 21678 | unsigned int abi_id = 0; |
| 21679 | rtx pat = PATTERN (insn); |
| 21680 | if (vzeroupper_pattern (pat, VOIDmode)) |
| 21681 | abi_id = ABI_VZEROUPPER; |
| 21682 | |
| 21683 | return function_abis[abi_id]; |
| 21684 | } |
| 21685 | |
| 21686 | /* Initialize function_abis with corresponding abi_id, |
| 21687 | currently only handle vzeroupper. */ |
| 21688 | void |
| 21689 | ix86_initialize_callee_abi (unsigned int abi_id) |
| 21690 | { |
| 21691 | gcc_assert (abi_id == ABI_VZEROUPPER); |
| 21692 | predefined_function_abi &vzeroupper_abi = function_abis[abi_id]; |
| 21693 | if (!vzeroupper_abi.initialized_p ()) |
| 21694 | { |
| 21695 | HARD_REG_SET full_reg_clobbers; |
| 21696 | CLEAR_HARD_REG_SET (set&: full_reg_clobbers); |
| 21697 | vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers); |
| 21698 | } |
| 21699 | } |
| 21700 | |
| 21701 | void |
| 21702 | ix86_expand_avx_vzeroupper (void) |
| 21703 | { |
| 21704 | /* Initialize vzeroupper_abi here. */ |
| 21705 | ix86_initialize_callee_abi (ABI_VZEROUPPER); |
| 21706 | rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ()); |
| 21707 | /* Return false for non-local goto in can_nonlocal_goto. */ |
| 21708 | make_reg_eh_region_note (insn, ecf_flags: 0, INT_MIN); |
| 21709 | /* Flag used for call_insn indicates it's a fake call. */ |
| 21710 | RTX_FLAG (insn, used) = 1; |
| 21711 | } |
| 21712 | |
| 21713 | |
| 21714 | /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that |
| 21715 | saves SSE registers across calls is Win64 (thus no need to check the |
| 21716 | current ABI here), and with AVX enabled Win64 only guarantees that |
| 21717 | the low 16 bytes are saved. */ |
| 21718 | |
| 21719 | static bool |
| 21720 | ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno, |
| 21721 | machine_mode mode) |
| 21722 | { |
| 21723 | /* Special ABI for vzeroupper which only clobber higher part of sse regs. */ |
| 21724 | if (abi_id == ABI_VZEROUPPER) |
| 21725 | return (GET_MODE_SIZE (mode) > 16 |
| 21726 | && ((TARGET_64BIT && REX_SSE_REGNO_P (regno)) |
| 21727 | || LEGACY_SSE_REGNO_P (regno))); |
| 21728 | |
| 21729 | return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16; |
| 21730 | } |
| 21731 | |
| 21732 | /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a |
| 21733 | tieable integer mode. */ |
| 21734 | |
| 21735 | static bool |
| 21736 | ix86_tieable_integer_mode_p (machine_mode mode) |
| 21737 | { |
| 21738 | switch (mode) |
| 21739 | { |
| 21740 | case E_HImode: |
| 21741 | case E_SImode: |
| 21742 | return true; |
| 21743 | |
| 21744 | case E_QImode: |
| 21745 | return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; |
| 21746 | |
| 21747 | case E_DImode: |
| 21748 | return TARGET_64BIT; |
| 21749 | |
| 21750 | default: |
| 21751 | return false; |
| 21752 | } |
| 21753 | } |
| 21754 | |
| 21755 | /* Implement TARGET_MODES_TIEABLE_P. |
| 21756 | |
| 21757 | Return true if MODE1 is accessible in a register that can hold MODE2 |
| 21758 | without copying. That is, all register classes that can hold MODE2 |
| 21759 | can also hold MODE1. */ |
| 21760 | |
| 21761 | static bool |
| 21762 | ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) |
| 21763 | { |
| 21764 | if (mode1 == mode2) |
| 21765 | return true; |
| 21766 | |
| 21767 | if (ix86_tieable_integer_mode_p (mode: mode1) |
| 21768 | && ix86_tieable_integer_mode_p (mode: mode2)) |
| 21769 | return true; |
| 21770 | |
| 21771 | /* MODE2 being XFmode implies fp stack or general regs, which means we |
| 21772 | can tie any smaller floating point modes to it. Note that we do not |
| 21773 | tie this with TFmode. */ |
| 21774 | if (mode2 == XFmode) |
| 21775 | return mode1 == SFmode || mode1 == DFmode; |
| 21776 | |
| 21777 | /* MODE2 being DFmode implies fp stack, general or sse regs, which means |
| 21778 | that we can tie it with SFmode. */ |
| 21779 | if (mode2 == DFmode) |
| 21780 | return mode1 == SFmode; |
| 21781 | |
| 21782 | /* If MODE2 is only appropriate for an SSE register, then tie with |
| 21783 | any vector modes or scalar floating point modes acceptable to SSE |
| 21784 | registers, excluding scalar integer modes with SUBREG: |
| 21785 | (subreg:QI (reg:TI 99) 0)) |
| 21786 | (subreg:HI (reg:TI 99) 0)) |
| 21787 | (subreg:SI (reg:TI 99) 0)) |
| 21788 | (subreg:DI (reg:TI 99) 0)) |
| 21789 | to avoid unnecessary move from SSE register to integer register. |
| 21790 | */ |
| 21791 | if (GET_MODE_SIZE (mode2) >= 16 |
| 21792 | && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2) |
| 21793 | || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1)) |
| 21794 | && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2))) |
| 21795 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2)) |
| 21796 | return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1); |
| 21797 | |
| 21798 | /* If MODE2 is appropriate for an MMX register, then tie |
| 21799 | with any other mode acceptable to MMX registers. */ |
| 21800 | if (GET_MODE_SIZE (mode2) == 8 |
| 21801 | && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode2)) |
| 21802 | return (GET_MODE_SIZE (mode1) == 8 |
| 21803 | && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode1)); |
| 21804 | |
| 21805 | /* SCmode and DImode can be tied. */ |
| 21806 | if ((mode1 == E_SCmode && mode2 == E_DImode) |
| 21807 | || (mode1 == E_DImode && mode2 == E_SCmode)) |
| 21808 | return TARGET_64BIT; |
| 21809 | |
| 21810 | /* [SD]Cmode and V2[SD]Fmode modes can be tied. */ |
| 21811 | if ((mode1 == E_SCmode && mode2 == E_V2SFmode) |
| 21812 | || (mode1 == E_V2SFmode && mode2 == E_SCmode) |
| 21813 | || (mode1 == E_DCmode && mode2 == E_V2DFmode) |
| 21814 | || (mode1 == E_V2DFmode && mode2 == E_DCmode)) |
| 21815 | return true; |
| 21816 | |
| 21817 | return false; |
| 21818 | } |
| 21819 | |
| 21820 | /* Return the cost of moving between two registers of mode MODE. */ |
| 21821 | |
| 21822 | static int |
| 21823 | ix86_set_reg_reg_cost (machine_mode mode) |
| 21824 | { |
| 21825 | unsigned int units = UNITS_PER_WORD; |
| 21826 | |
| 21827 | switch (GET_MODE_CLASS (mode)) |
| 21828 | { |
| 21829 | default: |
| 21830 | break; |
| 21831 | |
| 21832 | case MODE_CC: |
| 21833 | units = GET_MODE_SIZE (CCmode); |
| 21834 | break; |
| 21835 | |
| 21836 | case MODE_FLOAT: |
| 21837 | if ((TARGET_SSE && mode == TFmode) |
| 21838 | || (TARGET_80387 && mode == XFmode) |
| 21839 | || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode) |
| 21840 | || ((TARGET_80387 || TARGET_SSE) && mode == SFmode)) |
| 21841 | units = GET_MODE_SIZE (mode); |
| 21842 | break; |
| 21843 | |
| 21844 | case MODE_COMPLEX_FLOAT: |
| 21845 | if ((TARGET_SSE && mode == TCmode) |
| 21846 | || (TARGET_80387 && mode == XCmode) |
| 21847 | || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode) |
| 21848 | || ((TARGET_80387 || TARGET_SSE) && mode == SCmode)) |
| 21849 | units = GET_MODE_SIZE (mode); |
| 21850 | break; |
| 21851 | |
| 21852 | case MODE_VECTOR_INT: |
| 21853 | case MODE_VECTOR_FLOAT: |
| 21854 | if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) |
| 21855 | || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) |
| 21856 | || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) |
| 21857 | || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) |
| 21858 | || ((TARGET_MMX || TARGET_MMX_WITH_SSE) |
| 21859 | && VALID_MMX_REG_MODE (mode))) |
| 21860 | units = GET_MODE_SIZE (mode); |
| 21861 | } |
| 21862 | |
| 21863 | /* Return the cost of moving between two registers of mode MODE, |
| 21864 | assuming that the move will be in pieces of at most UNITS bytes. */ |
| 21865 | return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units)); |
| 21866 | } |
| 21867 | |
| 21868 | /* Return cost of vector operation in MODE given that scalar version has |
| 21869 | COST. */ |
| 21870 | |
| 21871 | static int |
| 21872 | ix86_vec_cost (machine_mode mode, int cost) |
| 21873 | { |
| 21874 | if (!VECTOR_MODE_P (mode)) |
| 21875 | return cost; |
| 21876 | |
| 21877 | if (GET_MODE_BITSIZE (mode) == 128 |
| 21878 | && TARGET_SSE_SPLIT_REGS) |
| 21879 | return cost * GET_MODE_BITSIZE (mode) / 64; |
| 21880 | else if (GET_MODE_BITSIZE (mode) > 128 |
| 21881 | && TARGET_AVX256_SPLIT_REGS) |
| 21882 | return cost * GET_MODE_BITSIZE (mode) / 128; |
| 21883 | else if (GET_MODE_BITSIZE (mode) > 256 |
| 21884 | && TARGET_AVX512_SPLIT_REGS) |
| 21885 | return cost * GET_MODE_BITSIZE (mode) / 256; |
| 21886 | return cost; |
| 21887 | } |
| 21888 | |
| 21889 | /* Return cost of vec_widen_<s>mult_hi/lo_<mode>, |
| 21890 | vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */ |
| 21891 | static int |
| 21892 | ix86_widen_mult_cost (const struct processor_costs *cost, |
| 21893 | enum machine_mode mode, bool uns_p) |
| 21894 | { |
| 21895 | gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); |
| 21896 | int = 0; |
| 21897 | int basic_cost = 0; |
| 21898 | switch (mode) |
| 21899 | { |
| 21900 | case V8HImode: |
| 21901 | case V16HImode: |
| 21902 | if (!uns_p || mode == V16HImode) |
| 21903 | extra_cost = cost->sse_op * 2; |
| 21904 | basic_cost = cost->mulss * 2 + cost->sse_op * 4; |
| 21905 | break; |
| 21906 | case V4SImode: |
| 21907 | case V8SImode: |
| 21908 | /* pmulhw/pmullw can be used. */ |
| 21909 | basic_cost = cost->mulss * 2 + cost->sse_op * 2; |
| 21910 | break; |
| 21911 | case V2DImode: |
| 21912 | /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend, |
| 21913 | require extra 4 mul, 4 add, 4 cmp and 2 shift. */ |
| 21914 | if (!TARGET_SSE4_1 && !uns_p) |
| 21915 | extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4 |
| 21916 | + cost->sse_op * 2; |
| 21917 | /* Fallthru. */ |
| 21918 | case V4DImode: |
| 21919 | basic_cost = cost->mulss * 2 + cost->sse_op * 4; |
| 21920 | break; |
| 21921 | default: |
| 21922 | /* Not implemented. */ |
| 21923 | return 100; |
| 21924 | } |
| 21925 | return ix86_vec_cost (mode, cost: basic_cost + extra_cost); |
| 21926 | } |
| 21927 | |
| 21928 | /* Return cost of multiplication in MODE. */ |
| 21929 | |
| 21930 | static int |
| 21931 | ix86_multiplication_cost (const struct processor_costs *cost, |
| 21932 | enum machine_mode mode) |
| 21933 | { |
| 21934 | machine_mode inner_mode = mode; |
| 21935 | if (VECTOR_MODE_P (mode)) |
| 21936 | inner_mode = GET_MODE_INNER (mode); |
| 21937 | |
| 21938 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 21939 | return inner_mode == DFmode ? cost->mulsd : cost->mulss; |
| 21940 | else if (X87_FLOAT_MODE_P (mode)) |
| 21941 | return cost->fmul; |
| 21942 | else if (FLOAT_MODE_P (mode)) |
| 21943 | return ix86_vec_cost (mode, |
| 21944 | cost: inner_mode == DFmode ? cost->mulsd : cost->mulss); |
| 21945 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
| 21946 | { |
| 21947 | int nmults, nops; |
| 21948 | /* Cost of reading the memory. */ |
| 21949 | int ; |
| 21950 | |
| 21951 | switch (mode) |
| 21952 | { |
| 21953 | case V4QImode: |
| 21954 | case V8QImode: |
| 21955 | /* Partial V*QImode is emulated with 4-6 insns. */ |
| 21956 | nmults = 1; |
| 21957 | nops = 3; |
| 21958 | extra = 0; |
| 21959 | |
| 21960 | if (TARGET_AVX512BW && TARGET_AVX512VL) |
| 21961 | ; |
| 21962 | else if (TARGET_AVX2) |
| 21963 | nops += 2; |
| 21964 | else if (TARGET_XOP) |
| 21965 | extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; |
| 21966 | else |
| 21967 | { |
| 21968 | nops += 1; |
| 21969 | extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; |
| 21970 | } |
| 21971 | goto do_qimode; |
| 21972 | |
| 21973 | case V16QImode: |
| 21974 | /* V*QImode is emulated with 4-11 insns. */ |
| 21975 | nmults = 1; |
| 21976 | nops = 3; |
| 21977 | extra = 0; |
| 21978 | |
| 21979 | if (TARGET_AVX2 && !TARGET_PREFER_AVX128) |
| 21980 | { |
| 21981 | if (!(TARGET_AVX512BW && TARGET_AVX512VL)) |
| 21982 | nops += 3; |
| 21983 | } |
| 21984 | else if (TARGET_XOP) |
| 21985 | { |
| 21986 | nmults += 1; |
| 21987 | nops += 2; |
| 21988 | extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; |
| 21989 | } |
| 21990 | else |
| 21991 | { |
| 21992 | nmults += 1; |
| 21993 | nops += 4; |
| 21994 | extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; |
| 21995 | } |
| 21996 | goto do_qimode; |
| 21997 | |
| 21998 | case V32QImode: |
| 21999 | nmults = 1; |
| 22000 | nops = 3; |
| 22001 | extra = 0; |
| 22002 | |
| 22003 | if (!TARGET_AVX512BW || TARGET_PREFER_AVX256) |
| 22004 | { |
| 22005 | nmults += 1; |
| 22006 | nops += 4; |
| 22007 | /* 2 loads, so no division by 2. */ |
| 22008 | extra += COSTS_N_INSNS (cost->sse_load[3]); |
| 22009 | } |
| 22010 | goto do_qimode; |
| 22011 | |
| 22012 | case V64QImode: |
| 22013 | nmults = 2; |
| 22014 | nops = 9; |
| 22015 | /* 2 loads of each size, so no division by 2. */ |
| 22016 | extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]); |
| 22017 | |
| 22018 | do_qimode: |
| 22019 | return ix86_vec_cost (mode, cost: cost->mulss * nmults |
| 22020 | + cost->sse_op * nops) + extra; |
| 22021 | |
| 22022 | case V4SImode: |
| 22023 | /* pmulld is used in this case. No emulation is needed. */ |
| 22024 | if (TARGET_SSE4_1) |
| 22025 | goto do_native; |
| 22026 | /* V4SImode is emulated with 7 insns. */ |
| 22027 | else |
| 22028 | return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 5); |
| 22029 | |
| 22030 | case V2DImode: |
| 22031 | case V4DImode: |
| 22032 | /* vpmullq is used in this case. No emulation is needed. */ |
| 22033 | if (TARGET_AVX512DQ && TARGET_AVX512VL) |
| 22034 | goto do_native; |
| 22035 | /* V*DImode is emulated with 6-8 insns. */ |
| 22036 | else if (TARGET_XOP && mode == V2DImode) |
| 22037 | return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 4); |
| 22038 | /* FALLTHRU */ |
| 22039 | case V8DImode: |
| 22040 | /* vpmullq is used in this case. No emulation is needed. */ |
| 22041 | if (TARGET_AVX512DQ && mode == V8DImode) |
| 22042 | goto do_native; |
| 22043 | else |
| 22044 | return ix86_vec_cost (mode, cost: cost->mulss * 3 + cost->sse_op * 5); |
| 22045 | |
| 22046 | default: |
| 22047 | do_native: |
| 22048 | return ix86_vec_cost (mode, cost: cost->mulss); |
| 22049 | } |
| 22050 | } |
| 22051 | else |
| 22052 | return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7); |
| 22053 | } |
| 22054 | |
| 22055 | /* Return cost of multiplication in MODE. */ |
| 22056 | |
| 22057 | static int |
| 22058 | ix86_division_cost (const struct processor_costs *cost, |
| 22059 | enum machine_mode mode) |
| 22060 | { |
| 22061 | machine_mode inner_mode = mode; |
| 22062 | if (VECTOR_MODE_P (mode)) |
| 22063 | inner_mode = GET_MODE_INNER (mode); |
| 22064 | |
| 22065 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 22066 | return inner_mode == DFmode ? cost->divsd : cost->divss; |
| 22067 | else if (X87_FLOAT_MODE_P (mode)) |
| 22068 | return cost->fdiv; |
| 22069 | else if (FLOAT_MODE_P (mode)) |
| 22070 | return ix86_vec_cost (mode, |
| 22071 | cost: inner_mode == DFmode ? cost->divsd : cost->divss); |
| 22072 | else |
| 22073 | return cost->divide[MODE_INDEX (mode)]; |
| 22074 | } |
| 22075 | |
| 22076 | /* Return cost of shift in MODE. |
| 22077 | If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL. |
| 22078 | AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE |
| 22079 | if op1 is a result of subreg. |
| 22080 | |
| 22081 | SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */ |
| 22082 | |
| 22083 | static int |
| 22084 | ix86_shift_rotate_cost (const struct processor_costs *cost, |
| 22085 | enum rtx_code code, |
| 22086 | enum machine_mode mode, bool constant_op1, |
| 22087 | HOST_WIDE_INT op1_val, |
| 22088 | bool and_in_op1, |
| 22089 | bool shift_and_truncate, |
| 22090 | bool *skip_op0, bool *skip_op1) |
| 22091 | { |
| 22092 | if (skip_op0) |
| 22093 | *skip_op0 = *skip_op1 = false; |
| 22094 | |
| 22095 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
| 22096 | { |
| 22097 | int count; |
| 22098 | /* Cost of reading the memory. */ |
| 22099 | int ; |
| 22100 | |
| 22101 | switch (mode) |
| 22102 | { |
| 22103 | case V4QImode: |
| 22104 | case V8QImode: |
| 22105 | if (TARGET_AVX2) |
| 22106 | /* Use vpbroadcast. */ |
| 22107 | extra = cost->sse_op; |
| 22108 | else |
| 22109 | extra = COSTS_N_INSNS (cost->sse_load[2]) / 2; |
| 22110 | |
| 22111 | if (constant_op1) |
| 22112 | { |
| 22113 | if (code == ASHIFTRT) |
| 22114 | { |
| 22115 | count = 4; |
| 22116 | extra *= 2; |
| 22117 | } |
| 22118 | else |
| 22119 | count = 2; |
| 22120 | } |
| 22121 | else if (TARGET_AVX512BW && TARGET_AVX512VL) |
| 22122 | return ix86_vec_cost (mode, cost: cost->sse_op * 4); |
| 22123 | else if (TARGET_SSE4_1) |
| 22124 | count = 5; |
| 22125 | else if (code == ASHIFTRT) |
| 22126 | count = 6; |
| 22127 | else |
| 22128 | count = 5; |
| 22129 | return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra; |
| 22130 | |
| 22131 | case V16QImode: |
| 22132 | if (TARGET_XOP) |
| 22133 | { |
| 22134 | /* For XOP we use vpshab, which requires a broadcast of the |
| 22135 | value to the variable shift insn. For constants this |
| 22136 | means a V16Q const in mem; even when we can perform the |
| 22137 | shift with one insn set the cost to prefer paddb. */ |
| 22138 | if (constant_op1) |
| 22139 | { |
| 22140 | extra = COSTS_N_INSNS (cost->sse_load[2]) / 2; |
| 22141 | return ix86_vec_cost (mode, cost: cost->sse_op) + extra; |
| 22142 | } |
| 22143 | else |
| 22144 | { |
| 22145 | count = (code == ASHIFT) ? 3 : 4; |
| 22146 | return ix86_vec_cost (mode, cost: cost->sse_op * count); |
| 22147 | } |
| 22148 | } |
| 22149 | /* FALLTHRU */ |
| 22150 | case V32QImode: |
| 22151 | if (TARGET_GFNI && constant_op1) |
| 22152 | { |
| 22153 | /* Use vgf2p8affine. One extra load for the mask, but in a loop |
| 22154 | with enough registers it will be moved out. So for now don't |
| 22155 | account the constant mask load. This is not quite right |
| 22156 | for non loop vectorization. */ |
| 22157 | extra = 0; |
| 22158 | return ix86_vec_cost (mode, cost: cost->sse_op) + extra; |
| 22159 | } |
| 22160 | if (TARGET_AVX2) |
| 22161 | /* Use vpbroadcast. */ |
| 22162 | extra = cost->sse_op; |
| 22163 | else |
| 22164 | extra = COSTS_N_INSNS (mode == V16QImode |
| 22165 | ? cost->sse_load[2] |
| 22166 | : cost->sse_load[3]) / 2; |
| 22167 | |
| 22168 | if (constant_op1) |
| 22169 | { |
| 22170 | if (code == ASHIFTRT) |
| 22171 | { |
| 22172 | count = 4; |
| 22173 | extra *= 2; |
| 22174 | } |
| 22175 | else |
| 22176 | count = 2; |
| 22177 | } |
| 22178 | else if (TARGET_AVX512BW |
| 22179 | && ((mode == V32QImode && !TARGET_PREFER_AVX256) |
| 22180 | || (mode == V16QImode && TARGET_AVX512VL |
| 22181 | && !TARGET_PREFER_AVX128))) |
| 22182 | return ix86_vec_cost (mode, cost: cost->sse_op * 4); |
| 22183 | else if (TARGET_AVX2 |
| 22184 | && mode == V16QImode && !TARGET_PREFER_AVX128) |
| 22185 | count = 6; |
| 22186 | else if (TARGET_SSE4_1) |
| 22187 | count = 9; |
| 22188 | else if (code == ASHIFTRT) |
| 22189 | count = 10; |
| 22190 | else |
| 22191 | count = 9; |
| 22192 | return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra; |
| 22193 | |
| 22194 | case V64QImode: |
| 22195 | /* Ignore the mask load for GF2P8AFFINEQB. */ |
| 22196 | extra = 0; |
| 22197 | return ix86_vec_cost (mode, cost: cost->sse_op) + extra; |
| 22198 | |
| 22199 | case V2DImode: |
| 22200 | case V4DImode: |
| 22201 | /* V*DImode arithmetic right shift is emulated. */ |
| 22202 | if (code == ASHIFTRT && !TARGET_AVX512VL) |
| 22203 | { |
| 22204 | if (constant_op1) |
| 22205 | { |
| 22206 | if (op1_val == 63) |
| 22207 | count = TARGET_SSE4_2 ? 1 : 2; |
| 22208 | else if (TARGET_XOP) |
| 22209 | count = 2; |
| 22210 | else if (TARGET_SSE4_1) |
| 22211 | count = 3; |
| 22212 | else |
| 22213 | count = 4; |
| 22214 | } |
| 22215 | else if (TARGET_XOP) |
| 22216 | count = 3; |
| 22217 | else if (TARGET_SSE4_2) |
| 22218 | count = 4; |
| 22219 | else |
| 22220 | count = 5; |
| 22221 | |
| 22222 | return ix86_vec_cost (mode, cost: cost->sse_op * count); |
| 22223 | } |
| 22224 | /* FALLTHRU */ |
| 22225 | default: |
| 22226 | return ix86_vec_cost (mode, cost: cost->sse_op); |
| 22227 | } |
| 22228 | } |
| 22229 | |
| 22230 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| 22231 | { |
| 22232 | if (constant_op1) |
| 22233 | { |
| 22234 | if (op1_val > 32) |
| 22235 | return cost->shift_const + COSTS_N_INSNS (2); |
| 22236 | else |
| 22237 | return cost->shift_const * 2; |
| 22238 | } |
| 22239 | else |
| 22240 | { |
| 22241 | if (and_in_op1) |
| 22242 | return cost->shift_var * 2; |
| 22243 | else |
| 22244 | return cost->shift_var * 6 + COSTS_N_INSNS (2); |
| 22245 | } |
| 22246 | } |
| 22247 | else |
| 22248 | { |
| 22249 | if (constant_op1) |
| 22250 | return cost->shift_const; |
| 22251 | else if (shift_and_truncate) |
| 22252 | { |
| 22253 | if (skip_op0) |
| 22254 | *skip_op0 = *skip_op1 = true; |
| 22255 | /* Return the cost after shift-and truncation. */ |
| 22256 | return cost->shift_var; |
| 22257 | } |
| 22258 | else |
| 22259 | return cost->shift_var; |
| 22260 | } |
| 22261 | } |
| 22262 | |
| 22263 | static int |
| 22264 | ix86_insn_cost (rtx_insn *insn, bool speed) |
| 22265 | { |
| 22266 | int insn_cost = 0; |
| 22267 | /* Add extra cost to avoid post_reload late_combine revert |
| 22268 | the optimization did in pass_rpad. */ |
| 22269 | if (reload_completed |
| 22270 | && ix86_rpad_gate () |
| 22271 | && recog_memoized (insn) >= 0 |
| 22272 | && get_attr_avx_partial_xmm_update (insn) |
| 22273 | == AVX_PARTIAL_XMM_UPDATE_TRUE) |
| 22274 | insn_cost += COSTS_N_INSNS (3); |
| 22275 | |
| 22276 | return insn_cost + pattern_cost (PATTERN (insn), speed); |
| 22277 | } |
| 22278 | |
| 22279 | /* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */ |
| 22280 | |
| 22281 | static int |
| 22282 | vec_fp_conversion_cost (const struct processor_costs *cost, int size) |
| 22283 | { |
| 22284 | if (size < 128) |
| 22285 | return cost->cvtss2sd; |
| 22286 | else if (size < 256) |
| 22287 | { |
| 22288 | if (TARGET_SSE_SPLIT_REGS) |
| 22289 | return cost->cvtss2sd * size / 64; |
| 22290 | return cost->cvtss2sd; |
| 22291 | } |
| 22292 | if (size < 512) |
| 22293 | return cost->vcvtps2pd256; |
| 22294 | else |
| 22295 | return cost->vcvtps2pd512; |
| 22296 | } |
| 22297 | |
| 22298 | /* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */ |
| 22299 | |
| 22300 | static bool |
| 22301 | unspec_pcmp_p (rtx x) |
| 22302 | { |
| 22303 | return GET_CODE (x) == UNSPEC |
| 22304 | && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP); |
| 22305 | } |
| 22306 | |
| 22307 | /* Compute a (partial) cost for rtx X. Return true if the complete |
| 22308 | cost has been computed, and false if subexpressions should be |
| 22309 | scanned. In either case, *TOTAL contains the cost result. */ |
| 22310 | |
| 22311 | static bool |
| 22312 | ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, |
| 22313 | int *total, bool speed) |
| 22314 | { |
| 22315 | rtx mask; |
| 22316 | enum rtx_code code = GET_CODE (x); |
| 22317 | enum rtx_code outer_code = (enum rtx_code) outer_code_i; |
| 22318 | const struct processor_costs *cost |
| 22319 | = speed ? ix86_tune_cost : &ix86_size_cost; |
| 22320 | int src_cost; |
| 22321 | |
| 22322 | /* Handling different vternlog variants. */ |
| 22323 | if ((GET_MODE_SIZE (mode) == 64 |
| 22324 | ? TARGET_AVX512F |
| 22325 | : (TARGET_AVX512VL |
| 22326 | || (TARGET_AVX512F && !TARGET_PREFER_AVX256))) |
| 22327 | && GET_MODE_SIZE (mode) >= 16 |
| 22328 | && outer_code_i == SET |
| 22329 | && ternlog_operand (x, mode)) |
| 22330 | { |
| 22331 | rtx args[3]; |
| 22332 | |
| 22333 | args[0] = NULL_RTX; |
| 22334 | args[1] = NULL_RTX; |
| 22335 | args[2] = NULL_RTX; |
| 22336 | int idx = ix86_ternlog_idx (op: x, args); |
| 22337 | gcc_assert (idx >= 0); |
| 22338 | |
| 22339 | *total = cost->sse_op; |
| 22340 | for (int i = 0; i != 3; i++) |
| 22341 | if (args[i]) |
| 22342 | *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed); |
| 22343 | return true; |
| 22344 | } |
| 22345 | |
| 22346 | |
| 22347 | switch (code) |
| 22348 | { |
| 22349 | case SET: |
| 22350 | if (register_operand (SET_DEST (x), VOIDmode) |
| 22351 | && register_operand (SET_SRC (x), VOIDmode)) |
| 22352 | { |
| 22353 | *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x))); |
| 22354 | return true; |
| 22355 | } |
| 22356 | |
| 22357 | if (register_operand (SET_SRC (x), VOIDmode)) |
| 22358 | /* Avoid potentially incorrect high cost from rtx_costs |
| 22359 | for non-tieable SUBREGs. */ |
| 22360 | src_cost = 0; |
| 22361 | else |
| 22362 | { |
| 22363 | src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed); |
| 22364 | |
| 22365 | if (CONSTANT_P (SET_SRC (x))) |
| 22366 | /* Constant costs assume a base value of COSTS_N_INSNS (1) and add |
| 22367 | a small value, possibly zero for cheap constants. */ |
| 22368 | src_cost += COSTS_N_INSNS (1); |
| 22369 | } |
| 22370 | |
| 22371 | *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed); |
| 22372 | return true; |
| 22373 | |
| 22374 | case CONST_INT: |
| 22375 | case CONST: |
| 22376 | case LABEL_REF: |
| 22377 | case SYMBOL_REF: |
| 22378 | if (x86_64_immediate_operand (x, VOIDmode)) |
| 22379 | *total = 0; |
| 22380 | else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode)) |
| 22381 | /* Consider the zext constants slightly more expensive, as they |
| 22382 | can't appear in most instructions. */ |
| 22383 | *total = 1; |
| 22384 | else |
| 22385 | /* movabsq is slightly more expensive than a simple instruction. */ |
| 22386 | *total = COSTS_N_INSNS (1) + 1; |
| 22387 | return true; |
| 22388 | |
| 22389 | case CONST_DOUBLE: |
| 22390 | if (IS_STACK_MODE (mode)) |
| 22391 | switch (standard_80387_constant_p (x)) |
| 22392 | { |
| 22393 | case -1: |
| 22394 | case 0: |
| 22395 | break; |
| 22396 | case 1: /* 0.0 */ |
| 22397 | *total = 1; |
| 22398 | return true; |
| 22399 | default: /* Other constants */ |
| 22400 | *total = 2; |
| 22401 | return true; |
| 22402 | } |
| 22403 | /* FALLTHRU */ |
| 22404 | |
| 22405 | case CONST_VECTOR: |
| 22406 | switch (standard_sse_constant_p (x, pred_mode: mode)) |
| 22407 | { |
| 22408 | case 0: |
| 22409 | break; |
| 22410 | case 1: /* 0: xor eliminates false dependency */ |
| 22411 | *total = 0; |
| 22412 | return true; |
| 22413 | default: /* -1: cmp contains false dependency */ |
| 22414 | *total = 1; |
| 22415 | return true; |
| 22416 | } |
| 22417 | /* FALLTHRU */ |
| 22418 | |
| 22419 | case CONST_WIDE_INT: |
| 22420 | /* Fall back to (MEM (SYMBOL_REF)), since that's where |
| 22421 | it'll probably end up. Add a penalty for size. */ |
| 22422 | *total = (COSTS_N_INSNS (1) |
| 22423 | + (!TARGET_64BIT && flag_pic) |
| 22424 | + (GET_MODE_SIZE (mode) <= 4 |
| 22425 | ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2)); |
| 22426 | return true; |
| 22427 | |
| 22428 | case ZERO_EXTEND: |
| 22429 | /* The zero extensions is often completely free on x86_64, so make |
| 22430 | it as cheap as possible. */ |
| 22431 | if (TARGET_64BIT && mode == DImode |
| 22432 | && GET_MODE (XEXP (x, 0)) == SImode) |
| 22433 | *total = 1; |
| 22434 | else if (TARGET_ZERO_EXTEND_WITH_AND) |
| 22435 | *total = cost->add; |
| 22436 | else |
| 22437 | *total = cost->movzx; |
| 22438 | return false; |
| 22439 | |
| 22440 | case SIGN_EXTEND: |
| 22441 | *total = cost->movsx; |
| 22442 | return false; |
| 22443 | |
| 22444 | case ASHIFT: |
| 22445 | if (SCALAR_INT_MODE_P (mode) |
| 22446 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD |
| 22447 | && CONST_INT_P (XEXP (x, 1))) |
| 22448 | { |
| 22449 | HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); |
| 22450 | if (value == 1) |
| 22451 | { |
| 22452 | *total = cost->add; |
| 22453 | return false; |
| 22454 | } |
| 22455 | if ((value == 2 || value == 3) |
| 22456 | && cost->lea <= cost->shift_const) |
| 22457 | { |
| 22458 | *total = cost->lea; |
| 22459 | return false; |
| 22460 | } |
| 22461 | } |
| 22462 | /* FALLTHRU */ |
| 22463 | |
| 22464 | case ROTATE: |
| 22465 | case ASHIFTRT: |
| 22466 | case LSHIFTRT: |
| 22467 | case ROTATERT: |
| 22468 | bool skip_op0, skip_op1; |
| 22469 | *total = ix86_shift_rotate_cost (cost, code, mode, |
| 22470 | CONSTANT_P (XEXP (x, 1)), |
| 22471 | CONST_INT_P (XEXP (x, 1)) |
| 22472 | ? INTVAL (XEXP (x, 1)) : -1, |
| 22473 | GET_CODE (XEXP (x, 1)) == AND, |
| 22474 | SUBREG_P (XEXP (x, 1)) |
| 22475 | && GET_CODE (XEXP (XEXP (x, 1), |
| 22476 | 0)) == AND, |
| 22477 | skip_op0: &skip_op0, skip_op1: &skip_op1); |
| 22478 | if (skip_op0 || skip_op1) |
| 22479 | { |
| 22480 | if (!skip_op0) |
| 22481 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
| 22482 | if (!skip_op1) |
| 22483 | *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed); |
| 22484 | return true; |
| 22485 | } |
| 22486 | return false; |
| 22487 | |
| 22488 | case FMA: |
| 22489 | { |
| 22490 | rtx sub; |
| 22491 | |
| 22492 | gcc_assert (FLOAT_MODE_P (mode)); |
| 22493 | gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); |
| 22494 | |
| 22495 | *total = ix86_vec_cost (mode, |
| 22496 | GET_MODE_INNER (mode) == SFmode |
| 22497 | ? cost->fmass : cost->fmasd); |
| 22498 | *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed); |
| 22499 | |
| 22500 | /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */ |
| 22501 | sub = XEXP (x, 0); |
| 22502 | if (GET_CODE (sub) == NEG) |
| 22503 | sub = XEXP (sub, 0); |
| 22504 | *total += rtx_cost (sub, mode, FMA, 0, speed); |
| 22505 | |
| 22506 | sub = XEXP (x, 2); |
| 22507 | if (GET_CODE (sub) == NEG) |
| 22508 | sub = XEXP (sub, 0); |
| 22509 | *total += rtx_cost (sub, mode, FMA, 2, speed); |
| 22510 | return true; |
| 22511 | } |
| 22512 | |
| 22513 | case MULT: |
| 22514 | if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode)) |
| 22515 | { |
| 22516 | rtx op0 = XEXP (x, 0); |
| 22517 | rtx op1 = XEXP (x, 1); |
| 22518 | int nbits; |
| 22519 | if (CONST_INT_P (XEXP (x, 1))) |
| 22520 | { |
| 22521 | unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); |
| 22522 | for (nbits = 0; value != 0; value &= value - 1) |
| 22523 | nbits++; |
| 22524 | } |
| 22525 | else |
| 22526 | /* This is arbitrary. */ |
| 22527 | nbits = 7; |
| 22528 | |
| 22529 | /* Compute costs correctly for widening multiplication. */ |
| 22530 | if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) |
| 22531 | && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 |
| 22532 | == GET_MODE_SIZE (mode)) |
| 22533 | { |
| 22534 | int is_mulwiden = 0; |
| 22535 | machine_mode inner_mode = GET_MODE (op0); |
| 22536 | |
| 22537 | if (GET_CODE (op0) == GET_CODE (op1)) |
| 22538 | is_mulwiden = 1, op1 = XEXP (op1, 0); |
| 22539 | else if (CONST_INT_P (op1)) |
| 22540 | { |
| 22541 | if (GET_CODE (op0) == SIGN_EXTEND) |
| 22542 | is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) |
| 22543 | == INTVAL (op1); |
| 22544 | else |
| 22545 | is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); |
| 22546 | } |
| 22547 | |
| 22548 | if (is_mulwiden) |
| 22549 | op0 = XEXP (op0, 0), mode = GET_MODE (op0); |
| 22550 | } |
| 22551 | |
| 22552 | int mult_init; |
| 22553 | // Double word multiplication requires 3 mults and 2 adds. |
| 22554 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| 22555 | { |
| 22556 | mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)] |
| 22557 | + 2 * cost->add; |
| 22558 | nbits *= 3; |
| 22559 | } |
| 22560 | else mult_init = cost->mult_init[MODE_INDEX (mode)]; |
| 22561 | |
| 22562 | *total = (mult_init |
| 22563 | + nbits * cost->mult_bit |
| 22564 | + rtx_cost (op0, mode, outer_code, opno, speed) |
| 22565 | + rtx_cost (op1, mode, outer_code, opno, speed)); |
| 22566 | |
| 22567 | return true; |
| 22568 | } |
| 22569 | *total = ix86_multiplication_cost (cost, mode); |
| 22570 | return false; |
| 22571 | |
| 22572 | case DIV: |
| 22573 | case UDIV: |
| 22574 | case MOD: |
| 22575 | case UMOD: |
| 22576 | *total = ix86_division_cost (cost, mode); |
| 22577 | return false; |
| 22578 | |
| 22579 | case PLUS: |
| 22580 | if (GET_MODE_CLASS (mode) == MODE_INT |
| 22581 | && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) |
| 22582 | { |
| 22583 | if (GET_CODE (XEXP (x, 0)) == PLUS |
| 22584 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
| 22585 | && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) |
| 22586 | && CONSTANT_P (XEXP (x, 1))) |
| 22587 | { |
| 22588 | HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); |
| 22589 | if (val == 2 || val == 4 || val == 8) |
| 22590 | { |
| 22591 | *total = cost->lea; |
| 22592 | *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, |
| 22593 | outer_code, opno, speed); |
| 22594 | *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, |
| 22595 | outer_code, opno, speed); |
| 22596 | *total += rtx_cost (XEXP (x, 1), mode, |
| 22597 | outer_code, opno, speed); |
| 22598 | return true; |
| 22599 | } |
| 22600 | } |
| 22601 | else if (GET_CODE (XEXP (x, 0)) == MULT |
| 22602 | && CONST_INT_P (XEXP (XEXP (x, 0), 1))) |
| 22603 | { |
| 22604 | HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); |
| 22605 | if (val == 2 || val == 4 || val == 8) |
| 22606 | { |
| 22607 | *total = cost->lea; |
| 22608 | *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
| 22609 | outer_code, opno, speed); |
| 22610 | *total += rtx_cost (XEXP (x, 1), mode, |
| 22611 | outer_code, opno, speed); |
| 22612 | return true; |
| 22613 | } |
| 22614 | } |
| 22615 | else if (GET_CODE (XEXP (x, 0)) == PLUS) |
| 22616 | { |
| 22617 | rtx op = XEXP (XEXP (x, 0), 0); |
| 22618 | |
| 22619 | /* Add with carry, ignore the cost of adding a carry flag. */ |
| 22620 | if (ix86_carry_flag_operator (op, mode) |
| 22621 | || ix86_carry_flag_unset_operator (op, mode)) |
| 22622 | *total = cost->add; |
| 22623 | else |
| 22624 | { |
| 22625 | *total = cost->lea; |
| 22626 | *total += rtx_cost (op, mode, |
| 22627 | outer_code, opno, speed); |
| 22628 | } |
| 22629 | |
| 22630 | *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, |
| 22631 | outer_code, opno, speed); |
| 22632 | *total += rtx_cost (XEXP (x, 1), mode, |
| 22633 | outer_code, opno, speed); |
| 22634 | return true; |
| 22635 | } |
| 22636 | } |
| 22637 | /* FALLTHRU */ |
| 22638 | |
| 22639 | case MINUS: |
| 22640 | /* Subtract with borrow, ignore the cost of subtracting a carry flag. */ |
| 22641 | if (GET_MODE_CLASS (mode) == MODE_INT |
| 22642 | && GET_MODE_SIZE (mode) <= UNITS_PER_WORD |
| 22643 | && GET_CODE (XEXP (x, 0)) == MINUS |
| 22644 | && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode) |
| 22645 | || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode))) |
| 22646 | { |
| 22647 | *total = cost->add; |
| 22648 | *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
| 22649 | outer_code, opno, speed); |
| 22650 | *total += rtx_cost (XEXP (x, 1), mode, |
| 22651 | outer_code, opno, speed); |
| 22652 | return true; |
| 22653 | } |
| 22654 | |
| 22655 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 22656 | *total = cost->addss; |
| 22657 | else if (X87_FLOAT_MODE_P (mode)) |
| 22658 | *total = cost->fadd; |
| 22659 | else if (FLOAT_MODE_P (mode)) |
| 22660 | *total = ix86_vec_cost (mode, cost: cost->addss); |
| 22661 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
| 22662 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
| 22663 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| 22664 | *total = cost->add * 2; |
| 22665 | else |
| 22666 | *total = cost->add; |
| 22667 | return false; |
| 22668 | |
| 22669 | case IOR: |
| 22670 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
| 22671 | || SSE_FLOAT_MODE_P (mode)) |
| 22672 | { |
| 22673 | /* (ior (not ...) ...) can be a single insn in AVX512. */ |
| 22674 | if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F |
| 22675 | && (GET_MODE_SIZE (mode) == 64 |
| 22676 | || (TARGET_AVX512VL |
| 22677 | && (GET_MODE_SIZE (mode) == 32 |
| 22678 | || GET_MODE_SIZE (mode) == 16)))) |
| 22679 | { |
| 22680 | rtx right = GET_CODE (XEXP (x, 1)) != NOT |
| 22681 | ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0); |
| 22682 | |
| 22683 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
| 22684 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
| 22685 | outer_code, opno, speed) |
| 22686 | + rtx_cost (right, mode, outer_code, opno, speed); |
| 22687 | return true; |
| 22688 | } |
| 22689 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
| 22690 | } |
| 22691 | else if (TARGET_64BIT |
| 22692 | && mode == TImode |
| 22693 | && GET_CODE (XEXP (x, 0)) == ASHIFT |
| 22694 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND |
| 22695 | && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode |
| 22696 | && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
| 22697 | && INTVAL (XEXP (XEXP (x, 0), 1)) == 64 |
| 22698 | && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND |
| 22699 | && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode) |
| 22700 | { |
| 22701 | /* *concatditi3 is cheap. */ |
| 22702 | rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0); |
| 22703 | rtx op1 = XEXP (XEXP (x, 1), 0); |
| 22704 | *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode) |
| 22705 | ? COSTS_N_INSNS (1) /* movq. */ |
| 22706 | : set_src_cost (x: op0, DImode, speed_p: speed); |
| 22707 | *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode) |
| 22708 | ? COSTS_N_INSNS (1) /* movq. */ |
| 22709 | : set_src_cost (x: op1, DImode, speed_p: speed); |
| 22710 | return true; |
| 22711 | } |
| 22712 | else if (TARGET_64BIT |
| 22713 | && mode == TImode |
| 22714 | && GET_CODE (XEXP (x, 0)) == AND |
| 22715 | && REG_P (XEXP (XEXP (x, 0), 0)) |
| 22716 | && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1)) |
| 22717 | && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2 |
| 22718 | && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1 |
| 22719 | && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0 |
| 22720 | && GET_CODE (XEXP (x, 1)) == ASHIFT |
| 22721 | && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND |
| 22722 | && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode |
| 22723 | && CONST_INT_P (XEXP (XEXP (x, 1), 1)) |
| 22724 | && INTVAL (XEXP (XEXP (x, 1), 1)) == 64) |
| 22725 | { |
| 22726 | /* *insvti_highpart is cheap. */ |
| 22727 | rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0); |
| 22728 | *total = COSTS_N_INSNS (1) + 1; |
| 22729 | *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode) |
| 22730 | ? COSTS_N_INSNS (1) /* movq. */ |
| 22731 | : set_src_cost (x: op, DImode, speed_p: speed); |
| 22732 | return true; |
| 22733 | } |
| 22734 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| 22735 | *total = cost->add * 2; |
| 22736 | else |
| 22737 | *total = cost->add; |
| 22738 | return false; |
| 22739 | |
| 22740 | case XOR: |
| 22741 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
| 22742 | || SSE_FLOAT_MODE_P (mode)) |
| 22743 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
| 22744 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| 22745 | *total = cost->add * 2; |
| 22746 | else |
| 22747 | *total = cost->add; |
| 22748 | return false; |
| 22749 | |
| 22750 | case AND: |
| 22751 | if (address_no_seg_operand (x, mode)) |
| 22752 | { |
| 22753 | *total = cost->lea; |
| 22754 | return true; |
| 22755 | } |
| 22756 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
| 22757 | || SSE_FLOAT_MODE_P (mode)) |
| 22758 | { |
| 22759 | /* pandn is a single instruction. */ |
| 22760 | if (GET_CODE (XEXP (x, 0)) == NOT) |
| 22761 | { |
| 22762 | rtx right = XEXP (x, 1); |
| 22763 | |
| 22764 | /* (and (not ...) (not ...)) can be a single insn in AVX512. */ |
| 22765 | if (GET_CODE (right) == NOT && TARGET_AVX512F |
| 22766 | && (GET_MODE_SIZE (mode) == 64 |
| 22767 | || (TARGET_AVX512VL |
| 22768 | && (GET_MODE_SIZE (mode) == 32 |
| 22769 | || GET_MODE_SIZE (mode) == 16)))) |
| 22770 | right = XEXP (right, 0); |
| 22771 | |
| 22772 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
| 22773 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
| 22774 | outer_code, opno, speed) |
| 22775 | + rtx_cost (right, mode, outer_code, opno, speed); |
| 22776 | return true; |
| 22777 | } |
| 22778 | else if (GET_CODE (XEXP (x, 1)) == NOT) |
| 22779 | { |
| 22780 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
| 22781 | + rtx_cost (XEXP (x, 0), mode, |
| 22782 | outer_code, opno, speed) |
| 22783 | + rtx_cost (XEXP (XEXP (x, 1), 0), mode, |
| 22784 | outer_code, opno, speed); |
| 22785 | return true; |
| 22786 | } |
| 22787 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
| 22788 | } |
| 22789 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| 22790 | { |
| 22791 | if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT) |
| 22792 | { |
| 22793 | *total = cost->add * 2 |
| 22794 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
| 22795 | outer_code, opno, speed) |
| 22796 | + rtx_cost (XEXP (x, 1), mode, |
| 22797 | outer_code, opno, speed); |
| 22798 | return true; |
| 22799 | } |
| 22800 | else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT) |
| 22801 | { |
| 22802 | *total = cost->add * 2 |
| 22803 | + rtx_cost (XEXP (x, 0), mode, |
| 22804 | outer_code, opno, speed) |
| 22805 | + rtx_cost (XEXP (XEXP (x, 1), 0), mode, |
| 22806 | outer_code, opno, speed); |
| 22807 | return true; |
| 22808 | } |
| 22809 | *total = cost->add * 2; |
| 22810 | } |
| 22811 | else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT) |
| 22812 | { |
| 22813 | *total = cost->add |
| 22814 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
| 22815 | outer_code, opno, speed) |
| 22816 | + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
| 22817 | return true; |
| 22818 | } |
| 22819 | else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT) |
| 22820 | { |
| 22821 | *total = cost->add |
| 22822 | + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) |
| 22823 | + rtx_cost (XEXP (XEXP (x, 1), 0), mode, |
| 22824 | outer_code, opno, speed); |
| 22825 | return true; |
| 22826 | } |
| 22827 | else |
| 22828 | *total = cost->add; |
| 22829 | return false; |
| 22830 | |
| 22831 | case NOT: |
| 22832 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
| 22833 | { |
| 22834 | /* (not (xor ...)) can be a single insn in AVX512. */ |
| 22835 | if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F |
| 22836 | && (GET_MODE_SIZE (mode) == 64 |
| 22837 | || (TARGET_AVX512VL |
| 22838 | && (GET_MODE_SIZE (mode) == 32 |
| 22839 | || GET_MODE_SIZE (mode) == 16)))) |
| 22840 | { |
| 22841 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
| 22842 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
| 22843 | outer_code, opno, speed) |
| 22844 | + rtx_cost (XEXP (XEXP (x, 0), 1), mode, |
| 22845 | outer_code, opno, speed); |
| 22846 | return true; |
| 22847 | } |
| 22848 | |
| 22849 | // vnot is pxor -1. |
| 22850 | *total = ix86_vec_cost (mode, cost: cost->sse_op) + 1; |
| 22851 | } |
| 22852 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| 22853 | *total = cost->add * 2; |
| 22854 | else |
| 22855 | *total = cost->add; |
| 22856 | return false; |
| 22857 | |
| 22858 | case NEG: |
| 22859 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 22860 | *total = cost->sse_op; |
| 22861 | else if (X87_FLOAT_MODE_P (mode)) |
| 22862 | *total = cost->fchs; |
| 22863 | else if (FLOAT_MODE_P (mode)) |
| 22864 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
| 22865 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
| 22866 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
| 22867 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| 22868 | *total = cost->add * 3; |
| 22869 | else |
| 22870 | *total = cost->add; |
| 22871 | return false; |
| 22872 | |
| 22873 | case COMPARE: |
| 22874 | rtx op0, op1; |
| 22875 | op0 = XEXP (x, 0); |
| 22876 | op1 = XEXP (x, 1); |
| 22877 | if (GET_CODE (op0) == ZERO_EXTRACT |
| 22878 | && XEXP (op0, 1) == const1_rtx |
| 22879 | && CONST_INT_P (XEXP (op0, 2)) |
| 22880 | && op1 == const0_rtx) |
| 22881 | { |
| 22882 | /* This kind of construct is implemented using test[bwl]. |
| 22883 | Treat it as if we had an AND. */ |
| 22884 | mode = GET_MODE (XEXP (op0, 0)); |
| 22885 | *total = (cost->add |
| 22886 | + rtx_cost (XEXP (op0, 0), mode, outer_code, |
| 22887 | opno, speed) |
| 22888 | + rtx_cost (const1_rtx, mode, outer_code, opno, speed)); |
| 22889 | return true; |
| 22890 | } |
| 22891 | |
| 22892 | if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1)) |
| 22893 | { |
| 22894 | /* This is an overflow detection, count it as a normal compare. */ |
| 22895 | *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed); |
| 22896 | return true; |
| 22897 | } |
| 22898 | |
| 22899 | rtx geu; |
| 22900 | /* Match x |
| 22901 | (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) |
| 22902 | (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */ |
| 22903 | if (mode == CCCmode |
| 22904 | && GET_CODE (op0) == NEG |
| 22905 | && GET_CODE (geu = XEXP (op0, 0)) == GEU |
| 22906 | && REG_P (XEXP (geu, 0)) |
| 22907 | && (GET_MODE (XEXP (geu, 0)) == CCCmode |
| 22908 | || GET_MODE (XEXP (geu, 0)) == CCmode) |
| 22909 | && REGNO (XEXP (geu, 0)) == FLAGS_REG |
| 22910 | && XEXP (geu, 1) == const0_rtx |
| 22911 | && GET_CODE (op1) == LTU |
| 22912 | && REG_P (XEXP (op1, 0)) |
| 22913 | && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0)) |
| 22914 | && REGNO (XEXP (op1, 0)) == FLAGS_REG |
| 22915 | && XEXP (op1, 1) == const0_rtx) |
| 22916 | { |
| 22917 | /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */ |
| 22918 | *total = 0; |
| 22919 | return true; |
| 22920 | } |
| 22921 | /* Match x |
| 22922 | (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) |
| 22923 | (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */ |
| 22924 | if (mode == CCCmode |
| 22925 | && GET_CODE (op0) == NEG |
| 22926 | && GET_CODE (XEXP (op0, 0)) == LTU |
| 22927 | && REG_P (XEXP (XEXP (op0, 0), 0)) |
| 22928 | && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode |
| 22929 | && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG |
| 22930 | && XEXP (XEXP (op0, 0), 1) == const0_rtx |
| 22931 | && GET_CODE (op1) == GEU |
| 22932 | && REG_P (XEXP (op1, 0)) |
| 22933 | && GET_MODE (XEXP (op1, 0)) == CCCmode |
| 22934 | && REGNO (XEXP (op1, 0)) == FLAGS_REG |
| 22935 | && XEXP (op1, 1) == const0_rtx) |
| 22936 | { |
| 22937 | /* This is *x86_cmc. */ |
| 22938 | if (!speed) |
| 22939 | *total = COSTS_N_BYTES (1); |
| 22940 | else if (TARGET_SLOW_STC) |
| 22941 | *total = COSTS_N_INSNS (2); |
| 22942 | else |
| 22943 | *total = COSTS_N_INSNS (1); |
| 22944 | return true; |
| 22945 | } |
| 22946 | |
| 22947 | if (SCALAR_INT_MODE_P (GET_MODE (op0)) |
| 22948 | && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) |
| 22949 | { |
| 22950 | if (op1 == const0_rtx) |
| 22951 | *total = cost->add |
| 22952 | + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed); |
| 22953 | else |
| 22954 | *total = 3*cost->add |
| 22955 | + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed) |
| 22956 | + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed); |
| 22957 | return true; |
| 22958 | } |
| 22959 | |
| 22960 | /* The embedded comparison operand is completely free. */ |
| 22961 | if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx) |
| 22962 | *total = 0; |
| 22963 | |
| 22964 | return false; |
| 22965 | |
| 22966 | case FLOAT_EXTEND: |
| 22967 | /* x87 represents all values extended to 80bit. */ |
| 22968 | if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 22969 | *total = 0; |
| 22970 | else |
| 22971 | *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); |
| 22972 | return false; |
| 22973 | |
| 22974 | case FLOAT_TRUNCATE: |
| 22975 | if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 22976 | *total = cost->fadd; |
| 22977 | else |
| 22978 | *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); |
| 22979 | return false; |
| 22980 | case FLOAT: |
| 22981 | case UNSIGNED_FLOAT: |
| 22982 | if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 22983 | /* TODO: We do not have cost tables for x87. */ |
| 22984 | *total = cost->fadd; |
| 22985 | else if (VECTOR_MODE_P (mode)) |
| 22986 | *total = ix86_vec_cost (mode, cost: cost->cvtpi2ps); |
| 22987 | else |
| 22988 | *total = cost->cvtsi2ss; |
| 22989 | return false; |
| 22990 | |
| 22991 | case FIX: |
| 22992 | case UNSIGNED_FIX: |
| 22993 | if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 22994 | /* TODO: We do not have cost tables for x87. */ |
| 22995 | *total = cost->fadd; |
| 22996 | else if (VECTOR_MODE_P (mode)) |
| 22997 | *total = ix86_vec_cost (mode, cost: cost->cvtps2pi); |
| 22998 | else |
| 22999 | *total = cost->cvtss2si; |
| 23000 | return false; |
| 23001 | |
| 23002 | case ABS: |
| 23003 | /* SSE requires memory load for the constant operand. It may make |
| 23004 | sense to account for this. Of course the constant operand may or |
| 23005 | may not be reused. */ |
| 23006 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 23007 | *total = cost->sse_op; |
| 23008 | else if (X87_FLOAT_MODE_P (mode)) |
| 23009 | *total = cost->fabs; |
| 23010 | else if (FLOAT_MODE_P (mode)) |
| 23011 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
| 23012 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
| 23013 | *total = cost->sse_op; |
| 23014 | return false; |
| 23015 | |
| 23016 | case SQRT: |
| 23017 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 23018 | *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; |
| 23019 | else if (X87_FLOAT_MODE_P (mode)) |
| 23020 | *total = cost->fsqrt; |
| 23021 | else if (FLOAT_MODE_P (mode)) |
| 23022 | *total = ix86_vec_cost (mode, |
| 23023 | cost: mode == SFmode ? cost->sqrtss : cost->sqrtsd); |
| 23024 | return false; |
| 23025 | |
| 23026 | case UNSPEC: |
| 23027 | if (XINT (x, 1) == UNSPEC_TP) |
| 23028 | *total = 0; |
| 23029 | else if (XINT (x, 1) == UNSPEC_VTERNLOG) |
| 23030 | { |
| 23031 | *total = cost->sse_op; |
| 23032 | *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed); |
| 23033 | *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed); |
| 23034 | *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed); |
| 23035 | return true; |
| 23036 | } |
| 23037 | else if (XINT (x, 1) == UNSPEC_PTEST) |
| 23038 | { |
| 23039 | *total = cost->sse_op; |
| 23040 | rtx test_op0 = XVECEXP (x, 0, 0); |
| 23041 | if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1))) |
| 23042 | return false; |
| 23043 | if (GET_CODE (test_op0) == AND) |
| 23044 | { |
| 23045 | rtx and_op0 = XEXP (test_op0, 0); |
| 23046 | if (GET_CODE (and_op0) == NOT) |
| 23047 | and_op0 = XEXP (and_op0, 0); |
| 23048 | *total += rtx_cost (and_op0, GET_MODE (and_op0), |
| 23049 | AND, 0, speed) |
| 23050 | + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0), |
| 23051 | AND, 1, speed); |
| 23052 | } |
| 23053 | else |
| 23054 | *total = rtx_cost (test_op0, GET_MODE (test_op0), |
| 23055 | UNSPEC, 0, speed); |
| 23056 | return true; |
| 23057 | } |
| 23058 | return false; |
| 23059 | |
| 23060 | case VEC_CONCAT: |
| 23061 | /* ??? Assume all of these vector manipulation patterns are |
| 23062 | recognizable. In which case they all pretty much have the |
| 23063 | same cost. |
| 23064 | ??? We should still recruse when computing cost. */ |
| 23065 | *total = cost->sse_op; |
| 23066 | return true; |
| 23067 | |
| 23068 | case VEC_SELECT: |
| 23069 | /* Special case extracting lower part from the vector. |
| 23070 | This by itself needs to code and most of SSE/AVX instructions have |
| 23071 | packed and single forms where the single form may be represented |
| 23072 | by such VEC_SELECT. |
| 23073 | |
| 23074 | Use cost 1 (despite the fact that functionally equivalent SUBREG has |
| 23075 | cost 0). Making VEC_SELECT completely free, for example instructs CSE |
| 23076 | to forward propagate VEC_SELECT into |
| 23077 | |
| 23078 | (set (reg eax) (reg src)) |
| 23079 | |
| 23080 | which then prevents fwprop and combining. See i.e. |
| 23081 | gcc.target/i386/pr91103-1.c. |
| 23082 | |
| 23083 | ??? rtvec_series_p test should be, for valid patterns, equivalent to |
| 23084 | vec_series_lowpart_p but is not, since the latter calls |
| 23085 | can_cange_mode_class on ALL_REGS and this return false since x87 does |
| 23086 | not support subregs at all. */ |
| 23087 | if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0)) |
| 23088 | *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)), |
| 23089 | outer_code, opno, speed) + 1; |
| 23090 | else |
| 23091 | /* ??? We should still recruse when computing cost. */ |
| 23092 | *total = cost->sse_op; |
| 23093 | return true; |
| 23094 | |
| 23095 | case VEC_DUPLICATE: |
| 23096 | *total = rtx_cost (XEXP (x, 0), |
| 23097 | GET_MODE (XEXP (x, 0)), |
| 23098 | VEC_DUPLICATE, 0, speed); |
| 23099 | /* It's broadcast instruction, not embedded broadcasting. */ |
| 23100 | if (outer_code == SET) |
| 23101 | *total += cost->sse_op; |
| 23102 | |
| 23103 | return true; |
| 23104 | |
| 23105 | case VEC_MERGE: |
| 23106 | mask = XEXP (x, 2); |
| 23107 | /* Scalar versions of SSE instructions may be represented as: |
| 23108 | |
| 23109 | (vec_merge (vec_duplicate (operation ....)) |
| 23110 | (register or memory) |
| 23111 | (const_int 1)) |
| 23112 | |
| 23113 | In this case vec_merge and vec_duplicate is for free. |
| 23114 | Just recurse into operation and second operand. */ |
| 23115 | if (mask == const1_rtx |
| 23116 | && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE) |
| 23117 | { |
| 23118 | *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
| 23119 | outer_code, opno, speed) |
| 23120 | + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
| 23121 | return true; |
| 23122 | } |
| 23123 | /* This is masked instruction, assume the same cost, |
| 23124 | as nonmasked variant. */ |
| 23125 | else if (TARGET_AVX512F |
| 23126 | && (register_operand (mask, GET_MODE (mask)) |
| 23127 | /* Redunduant clean up of high bits for kmask with VL=2/4 |
| 23128 | .i.e (vec_merge op0, op1, (and op3 15)). */ |
| 23129 | || (GET_CODE (mask) == AND |
| 23130 | && register_operand (XEXP (mask, 0), GET_MODE (mask)) |
| 23131 | && CONST_INT_P (XEXP (mask, 1)) |
| 23132 | && ((INTVAL (XEXP (mask, 1)) == 3 |
| 23133 | && GET_MODE_NUNITS (mode) == 2) |
| 23134 | || (INTVAL (XEXP (mask, 1)) == 15 |
| 23135 | && GET_MODE_NUNITS (mode) == 4))))) |
| 23136 | { |
| 23137 | *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) |
| 23138 | + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
| 23139 | return true; |
| 23140 | } |
| 23141 | /* Combination of the two above: |
| 23142 | |
| 23143 | (vec_merge (vec_merge (vec_duplicate (operation ...)) |
| 23144 | (register or memory) |
| 23145 | (reg:QI mask)) |
| 23146 | (register or memory) |
| 23147 | (const_int 1)) |
| 23148 | |
| 23149 | i.e. avx512fp16_vcvtss2sh_mask. */ |
| 23150 | else if (TARGET_AVX512F |
| 23151 | && mask == const1_rtx |
| 23152 | && GET_CODE (XEXP (x, 0)) == VEC_MERGE |
| 23153 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE |
| 23154 | && register_operand (XEXP (XEXP (x, 0), 2), |
| 23155 | GET_MODE (XEXP (XEXP (x, 0), 2)))) |
| 23156 | { |
| 23157 | *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), |
| 23158 | mode, outer_code, opno, speed) |
| 23159 | + rtx_cost (XEXP (XEXP (x, 0), 1), |
| 23160 | mode, outer_code, opno, speed) |
| 23161 | + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
| 23162 | return true; |
| 23163 | } |
| 23164 | /* vcmp. */ |
| 23165 | else if (unspec_pcmp_p (x: mask) |
| 23166 | || (GET_CODE (mask) == NOT |
| 23167 | && unspec_pcmp_p (XEXP (mask, 0)))) |
| 23168 | { |
| 23169 | rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask; |
| 23170 | rtx unsop0 = XVECEXP (uns, 0, 0); |
| 23171 | /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0) |
| 23172 | cost the same as register. |
| 23173 | This is used by avx_cmp<mode>3_ltint_not. */ |
| 23174 | if (SUBREG_P (unsop0)) |
| 23175 | unsop0 = XEXP (unsop0, 0); |
| 23176 | if (GET_CODE (unsop0) == NOT) |
| 23177 | unsop0 = XEXP (unsop0, 0); |
| 23178 | *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) |
| 23179 | + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed) |
| 23180 | + rtx_cost (unsop0, mode, UNSPEC, opno, speed) |
| 23181 | + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed) |
| 23182 | + cost->sse_op; |
| 23183 | return true; |
| 23184 | } |
| 23185 | else |
| 23186 | *total = cost->sse_op; |
| 23187 | return false; |
| 23188 | |
| 23189 | case MEM: |
| 23190 | /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast. |
| 23191 | or variants in ix86_vector_duplicate_simode_const. */ |
| 23192 | |
| 23193 | if (GET_MODE_SIZE (mode) >= 16 |
| 23194 | && VECTOR_MODE_P (mode) |
| 23195 | && SYMBOL_REF_P (XEXP (x, 0)) |
| 23196 | && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) |
| 23197 | && ix86_broadcast_from_constant (mode, x)) |
| 23198 | { |
| 23199 | *total = COSTS_N_INSNS (2) + speed; |
| 23200 | return true; |
| 23201 | } |
| 23202 | |
| 23203 | /* An insn that accesses memory is slightly more expensive |
| 23204 | than one that does not. */ |
| 23205 | if (speed) |
| 23206 | { |
| 23207 | *total += 1; |
| 23208 | rtx addr = XEXP (x, 0); |
| 23209 | /* For MEM, rtx_cost iterates each subrtx, and adds up the costs, |
| 23210 | so for MEM (reg) and MEM (reg + 4), the former costs 5, |
| 23211 | the latter costs 9, it is not accurate for x86. Ideally |
| 23212 | address_cost should be used, but it reduce cost too much. |
| 23213 | So current solution is make constant disp as cheap as possible. */ |
| 23214 | if (GET_CODE (addr) == PLUS |
| 23215 | && x86_64_immediate_operand (XEXP (addr, 1), Pmode) |
| 23216 | /* Only handle (reg + disp) since other forms of addr are mostly LEA, |
| 23217 | there's no additional cost for the plus of disp. */ |
| 23218 | && register_operand (XEXP (addr, 0), Pmode)) |
| 23219 | { |
| 23220 | *total += 1; |
| 23221 | *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed); |
| 23222 | return true; |
| 23223 | } |
| 23224 | } |
| 23225 | |
| 23226 | return false; |
| 23227 | |
| 23228 | case ZERO_EXTRACT: |
| 23229 | if (XEXP (x, 1) == const1_rtx |
| 23230 | && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND |
| 23231 | && GET_MODE (XEXP (x, 2)) == SImode |
| 23232 | && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode) |
| 23233 | { |
| 23234 | /* Ignore cost of zero extension and masking of last argument. */ |
| 23235 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
| 23236 | *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); |
| 23237 | *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed); |
| 23238 | return true; |
| 23239 | } |
| 23240 | return false; |
| 23241 | |
| 23242 | case IF_THEN_ELSE: |
| 23243 | if (TARGET_XOP |
| 23244 | && VECTOR_MODE_P (mode) |
| 23245 | && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32)) |
| 23246 | { |
| 23247 | /* vpcmov. */ |
| 23248 | *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6); |
| 23249 | if (!REG_P (XEXP (x, 0))) |
| 23250 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
| 23251 | if (!REG_P (XEXP (x, 1))) |
| 23252 | *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); |
| 23253 | if (!REG_P (XEXP (x, 2))) |
| 23254 | *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed); |
| 23255 | return true; |
| 23256 | } |
| 23257 | else if (TARGET_CMOVE |
| 23258 | && SCALAR_INT_MODE_P (mode) |
| 23259 | && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) |
| 23260 | { |
| 23261 | /* cmov. */ |
| 23262 | *total = COSTS_N_INSNS (1); |
| 23263 | if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0))) |
| 23264 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
| 23265 | if (!REG_P (XEXP (x, 1))) |
| 23266 | *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); |
| 23267 | if (!REG_P (XEXP (x, 2))) |
| 23268 | *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed); |
| 23269 | return true; |
| 23270 | } |
| 23271 | return false; |
| 23272 | |
| 23273 | default: |
| 23274 | return false; |
| 23275 | } |
| 23276 | } |
| 23277 | |
| 23278 | #if TARGET_MACHO |
| 23279 | |
| 23280 | static int current_machopic_label_num; |
| 23281 | |
| 23282 | /* Given a symbol name and its associated stub, write out the |
| 23283 | definition of the stub. */ |
| 23284 | |
| 23285 | void |
| 23286 | machopic_output_stub (FILE *file, const char *symb, const char *stub) |
| 23287 | { |
| 23288 | unsigned int length; |
| 23289 | char *binder_name, *symbol_name, lazy_ptr_name[32]; |
| 23290 | int label = ++current_machopic_label_num; |
| 23291 | |
| 23292 | /* For 64-bit we shouldn't get here. */ |
| 23293 | gcc_assert (!TARGET_64BIT); |
| 23294 | |
| 23295 | /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ |
| 23296 | symb = targetm.strip_name_encoding (symb); |
| 23297 | |
| 23298 | length = strlen (stub); |
| 23299 | binder_name = XALLOCAVEC (char, length + 32); |
| 23300 | GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); |
| 23301 | |
| 23302 | length = strlen (symb); |
| 23303 | symbol_name = XALLOCAVEC (char, length + 32); |
| 23304 | GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); |
| 23305 | |
| 23306 | sprintf (lazy_ptr_name, "L%d$lz" , label); |
| 23307 | |
| 23308 | if (MACHOPIC_ATT_STUB) |
| 23309 | switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); |
| 23310 | else if (MACHOPIC_PURE) |
| 23311 | switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); |
| 23312 | else |
| 23313 | switch_to_section (darwin_sections[machopic_symbol_stub_section]); |
| 23314 | |
| 23315 | fprintf (file, "%s:\n" , stub); |
| 23316 | fprintf (file, "\t.indirect_symbol %s\n" , symbol_name); |
| 23317 | |
| 23318 | if (MACHOPIC_ATT_STUB) |
| 23319 | { |
| 23320 | fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n" ); |
| 23321 | } |
| 23322 | else if (MACHOPIC_PURE) |
| 23323 | { |
| 23324 | /* PIC stub. */ |
| 23325 | /* 25-byte PIC stub using "CALL get_pc_thunk". */ |
| 23326 | rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); |
| 23327 | output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */ |
| 23328 | fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n" , |
| 23329 | label, lazy_ptr_name, label); |
| 23330 | fprintf (file, "\tjmp\t*%%ecx\n" ); |
| 23331 | } |
| 23332 | else |
| 23333 | fprintf (file, "\tjmp\t*%s\n" , lazy_ptr_name); |
| 23334 | |
| 23335 | /* The AT&T-style ("self-modifying") stub is not lazily bound, thus |
| 23336 | it needs no stub-binding-helper. */ |
| 23337 | if (MACHOPIC_ATT_STUB) |
| 23338 | return; |
| 23339 | |
| 23340 | fprintf (file, "%s:\n" , binder_name); |
| 23341 | |
| 23342 | if (MACHOPIC_PURE) |
| 23343 | { |
| 23344 | fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n" , lazy_ptr_name, binder_name); |
| 23345 | fprintf (file, "\tpushl\t%%ecx\n" ); |
| 23346 | } |
| 23347 | else |
| 23348 | fprintf (file, "\tpushl\t$%s\n" , lazy_ptr_name); |
| 23349 | |
| 23350 | fputs ("\tjmp\tdyld_stub_binding_helper\n" , file); |
| 23351 | |
| 23352 | /* N.B. Keep the correspondence of these |
| 23353 | 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the |
| 23354 | old-pic/new-pic/non-pic stubs; altering this will break |
| 23355 | compatibility with existing dylibs. */ |
| 23356 | if (MACHOPIC_PURE) |
| 23357 | { |
| 23358 | /* 25-byte PIC stub using "CALL get_pc_thunk". */ |
| 23359 | switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); |
| 23360 | } |
| 23361 | else |
| 23362 | /* 16-byte -mdynamic-no-pic stub. */ |
| 23363 | switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]); |
| 23364 | |
| 23365 | fprintf (file, "%s:\n" , lazy_ptr_name); |
| 23366 | fprintf (file, "\t.indirect_symbol %s\n" , symbol_name); |
| 23367 | fprintf (file, ASM_LONG "%s\n" , binder_name); |
| 23368 | } |
| 23369 | #endif /* TARGET_MACHO */ |
| 23370 | |
| 23371 | /* Order the registers for register allocator. */ |
| 23372 | |
| 23373 | void |
| 23374 | x86_order_regs_for_local_alloc (void) |
| 23375 | { |
| 23376 | int pos = 0; |
| 23377 | int i; |
| 23378 | |
| 23379 | /* First allocate the local general purpose registers. */ |
| 23380 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| 23381 | if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (regno: i)) |
| 23382 | reg_alloc_order [pos++] = i; |
| 23383 | |
| 23384 | /* Global general purpose registers. */ |
| 23385 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| 23386 | if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (regno: i)) |
| 23387 | reg_alloc_order [pos++] = i; |
| 23388 | |
| 23389 | /* x87 registers come first in case we are doing FP math |
| 23390 | using them. */ |
| 23391 | if (!TARGET_SSE_MATH) |
| 23392 | for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) |
| 23393 | reg_alloc_order [pos++] = i; |
| 23394 | |
| 23395 | /* SSE registers. */ |
| 23396 | for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) |
| 23397 | reg_alloc_order [pos++] = i; |
| 23398 | for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) |
| 23399 | reg_alloc_order [pos++] = i; |
| 23400 | |
| 23401 | /* Extended REX SSE registers. */ |
| 23402 | for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
| 23403 | reg_alloc_order [pos++] = i; |
| 23404 | |
| 23405 | /* Mask register. */ |
| 23406 | for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) |
| 23407 | reg_alloc_order [pos++] = i; |
| 23408 | |
| 23409 | /* x87 registers. */ |
| 23410 | if (TARGET_SSE_MATH) |
| 23411 | for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) |
| 23412 | reg_alloc_order [pos++] = i; |
| 23413 | |
| 23414 | for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) |
| 23415 | reg_alloc_order [pos++] = i; |
| 23416 | |
| 23417 | /* Initialize the rest of array as we do not allocate some registers |
| 23418 | at all. */ |
| 23419 | while (pos < FIRST_PSEUDO_REGISTER) |
| 23420 | reg_alloc_order [pos++] = 0; |
| 23421 | } |
| 23422 | |
| 23423 | static bool |
| 23424 | ix86_ms_bitfield_layout_p (const_tree record_type) |
| 23425 | { |
| 23426 | return ((TARGET_MS_BITFIELD_LAYOUT |
| 23427 | && !lookup_attribute (attr_name: "gcc_struct" , TYPE_ATTRIBUTES (record_type))) |
| 23428 | || lookup_attribute (attr_name: "ms_struct" , TYPE_ATTRIBUTES (record_type))); |
| 23429 | } |
| 23430 | |
| 23431 | /* Returns an expression indicating where the this parameter is |
| 23432 | located on entry to the FUNCTION. */ |
| 23433 | |
| 23434 | static rtx |
| 23435 | x86_this_parameter (tree function) |
| 23436 | { |
| 23437 | tree type = TREE_TYPE (function); |
| 23438 | bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; |
| 23439 | int nregs; |
| 23440 | |
| 23441 | if (TARGET_64BIT) |
| 23442 | { |
| 23443 | const int *parm_regs; |
| 23444 | |
| 23445 | if (lookup_attribute (attr_name: "preserve_none" , TYPE_ATTRIBUTES (type))) |
| 23446 | parm_regs = x86_64_preserve_none_int_parameter_registers; |
| 23447 | else if (ix86_function_type_abi (fntype: type) == MS_ABI) |
| 23448 | parm_regs = x86_64_ms_abi_int_parameter_registers; |
| 23449 | else |
| 23450 | parm_regs = x86_64_int_parameter_registers; |
| 23451 | return gen_rtx_REG (Pmode, parm_regs[aggr]); |
| 23452 | } |
| 23453 | |
| 23454 | nregs = ix86_function_regparm (type, decl: function); |
| 23455 | |
| 23456 | if (nregs > 0 && !stdarg_p (type)) |
| 23457 | { |
| 23458 | int regno; |
| 23459 | unsigned int ccvt = ix86_get_callcvt (type); |
| 23460 | |
| 23461 | if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
| 23462 | regno = aggr ? DX_REG : CX_REG; |
| 23463 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
| 23464 | { |
| 23465 | regno = CX_REG; |
| 23466 | if (aggr) |
| 23467 | return gen_rtx_MEM (SImode, |
| 23468 | plus_constant (Pmode, stack_pointer_rtx, 4)); |
| 23469 | } |
| 23470 | else |
| 23471 | { |
| 23472 | regno = AX_REG; |
| 23473 | if (aggr) |
| 23474 | { |
| 23475 | regno = DX_REG; |
| 23476 | if (nregs == 1) |
| 23477 | return gen_rtx_MEM (SImode, |
| 23478 | plus_constant (Pmode, |
| 23479 | stack_pointer_rtx, 4)); |
| 23480 | } |
| 23481 | } |
| 23482 | return gen_rtx_REG (SImode, regno); |
| 23483 | } |
| 23484 | |
| 23485 | return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, |
| 23486 | aggr ? 8 : 4)); |
| 23487 | } |
| 23488 | |
| 23489 | /* Determine whether x86_output_mi_thunk can succeed. */ |
| 23490 | |
| 23491 | static bool |
| 23492 | x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset, |
| 23493 | const_tree function) |
| 23494 | { |
| 23495 | /* 64-bit can handle anything. */ |
| 23496 | if (TARGET_64BIT) |
| 23497 | return true; |
| 23498 | |
| 23499 | /* For 32-bit, everything's fine if we have one free register. */ |
| 23500 | if (ix86_function_regparm (TREE_TYPE (function), decl: function) < 3) |
| 23501 | return true; |
| 23502 | |
| 23503 | /* Need a free register for vcall_offset. */ |
| 23504 | if (vcall_offset) |
| 23505 | return false; |
| 23506 | |
| 23507 | /* Need a free register for GOT references. */ |
| 23508 | if (flag_pic && !targetm.binds_local_p (function)) |
| 23509 | return false; |
| 23510 | |
| 23511 | /* Otherwise ok. */ |
| 23512 | return true; |
| 23513 | } |
| 23514 | |
| 23515 | /* Output the assembler code for a thunk function. THUNK_DECL is the |
| 23516 | declaration for the thunk function itself, FUNCTION is the decl for |
| 23517 | the target function. DELTA is an immediate constant offset to be |
| 23518 | added to THIS. If VCALL_OFFSET is nonzero, the word at |
| 23519 | *(*this + vcall_offset) should be added to THIS. */ |
| 23520 | |
| 23521 | static void |
| 23522 | x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, |
| 23523 | HOST_WIDE_INT vcall_offset, tree function) |
| 23524 | { |
| 23525 | const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); |
| 23526 | rtx this_param = x86_this_parameter (function); |
| 23527 | rtx this_reg, tmp, fnaddr; |
| 23528 | unsigned int tmp_regno; |
| 23529 | rtx_insn *insn; |
| 23530 | int saved_flag_force_indirect_call = flag_force_indirect_call; |
| 23531 | |
| 23532 | if (TARGET_64BIT) |
| 23533 | tmp_regno = R10_REG; |
| 23534 | else |
| 23535 | { |
| 23536 | unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function)); |
| 23537 | if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
| 23538 | tmp_regno = AX_REG; |
| 23539 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
| 23540 | tmp_regno = DX_REG; |
| 23541 | else |
| 23542 | tmp_regno = CX_REG; |
| 23543 | |
| 23544 | if (flag_pic) |
| 23545 | flag_force_indirect_call = 0; |
| 23546 | } |
| 23547 | |
| 23548 | emit_note (NOTE_INSN_PROLOGUE_END); |
| 23549 | |
| 23550 | /* CET is enabled, insert EB instruction. */ |
| 23551 | if ((flag_cf_protection & CF_BRANCH)) |
| 23552 | emit_insn (gen_nop_endbr ()); |
| 23553 | |
| 23554 | /* If VCALL_OFFSET, we'll need THIS in a register. Might as well |
| 23555 | pull it in now and let DELTA benefit. */ |
| 23556 | if (REG_P (this_param)) |
| 23557 | this_reg = this_param; |
| 23558 | else if (vcall_offset) |
| 23559 | { |
| 23560 | /* Put the this parameter into %eax. */ |
| 23561 | this_reg = gen_rtx_REG (Pmode, AX_REG); |
| 23562 | emit_move_insn (this_reg, this_param); |
| 23563 | } |
| 23564 | else |
| 23565 | this_reg = NULL_RTX; |
| 23566 | |
| 23567 | /* Adjust the this parameter by a fixed constant. */ |
| 23568 | if (delta) |
| 23569 | { |
| 23570 | rtx delta_rtx = GEN_INT (delta); |
| 23571 | rtx delta_dst = this_reg ? this_reg : this_param; |
| 23572 | |
| 23573 | if (TARGET_64BIT) |
| 23574 | { |
| 23575 | if (!x86_64_general_operand (delta_rtx, Pmode)) |
| 23576 | { |
| 23577 | tmp = gen_rtx_REG (Pmode, tmp_regno); |
| 23578 | emit_move_insn (tmp, delta_rtx); |
| 23579 | delta_rtx = tmp; |
| 23580 | } |
| 23581 | } |
| 23582 | |
| 23583 | ix86_emit_binop (code: PLUS, Pmode, dst: delta_dst, src: delta_rtx); |
| 23584 | } |
| 23585 | |
| 23586 | /* Adjust the this parameter by a value stored in the vtable. */ |
| 23587 | if (vcall_offset) |
| 23588 | { |
| 23589 | rtx vcall_addr, vcall_mem, this_mem; |
| 23590 | |
| 23591 | tmp = gen_rtx_REG (Pmode, tmp_regno); |
| 23592 | |
| 23593 | this_mem = gen_rtx_MEM (ptr_mode, this_reg); |
| 23594 | if (Pmode != ptr_mode) |
| 23595 | this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem); |
| 23596 | emit_move_insn (tmp, this_mem); |
| 23597 | |
| 23598 | /* Adjust the this parameter. */ |
| 23599 | vcall_addr = plus_constant (Pmode, tmp, vcall_offset); |
| 23600 | if (TARGET_64BIT |
| 23601 | && !ix86_legitimate_address_p (ptr_mode, addr: vcall_addr, strict: true)) |
| 23602 | { |
| 23603 | rtx tmp2 = gen_rtx_REG (Pmode, R11_REG); |
| 23604 | emit_move_insn (tmp2, GEN_INT (vcall_offset)); |
| 23605 | vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2); |
| 23606 | } |
| 23607 | |
| 23608 | vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr); |
| 23609 | if (Pmode != ptr_mode) |
| 23610 | emit_insn (gen_addsi_1_zext (this_reg, |
| 23611 | gen_rtx_REG (ptr_mode, |
| 23612 | REGNO (this_reg)), |
| 23613 | vcall_mem)); |
| 23614 | else |
| 23615 | ix86_emit_binop (code: PLUS, Pmode, dst: this_reg, src: vcall_mem); |
| 23616 | } |
| 23617 | |
| 23618 | /* If necessary, drop THIS back to its stack slot. */ |
| 23619 | if (this_reg && this_reg != this_param) |
| 23620 | emit_move_insn (this_param, this_reg); |
| 23621 | |
| 23622 | fnaddr = XEXP (DECL_RTL (function), 0); |
| 23623 | if (TARGET_64BIT) |
| 23624 | { |
| 23625 | if (!flag_pic || targetm.binds_local_p (function) |
| 23626 | || TARGET_PECOFF) |
| 23627 | ; |
| 23628 | else |
| 23629 | { |
| 23630 | tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL); |
| 23631 | tmp = gen_rtx_CONST (Pmode, tmp); |
| 23632 | fnaddr = gen_const_mem (Pmode, tmp); |
| 23633 | } |
| 23634 | } |
| 23635 | else |
| 23636 | { |
| 23637 | if (!flag_pic || targetm.binds_local_p (function)) |
| 23638 | ; |
| 23639 | #if TARGET_MACHO |
| 23640 | else if (TARGET_MACHO) |
| 23641 | { |
| 23642 | fnaddr = machopic_indirect_call_target (DECL_RTL (function)); |
| 23643 | fnaddr = XEXP (fnaddr, 0); |
| 23644 | } |
| 23645 | #endif /* TARGET_MACHO */ |
| 23646 | else |
| 23647 | { |
| 23648 | tmp = gen_rtx_REG (Pmode, CX_REG); |
| 23649 | output_set_got (dest: tmp, NULL_RTX); |
| 23650 | |
| 23651 | fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT); |
| 23652 | fnaddr = gen_rtx_CONST (Pmode, fnaddr); |
| 23653 | fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr); |
| 23654 | fnaddr = gen_const_mem (Pmode, fnaddr); |
| 23655 | } |
| 23656 | } |
| 23657 | |
| 23658 | /* Our sibling call patterns do not allow memories, because we have no |
| 23659 | predicate that can distinguish between frame and non-frame memory. |
| 23660 | For our purposes here, we can get away with (ab)using a jump pattern, |
| 23661 | because we're going to do no optimization. */ |
| 23662 | if (MEM_P (fnaddr)) |
| 23663 | { |
| 23664 | if (sibcall_insn_operand (fnaddr, word_mode)) |
| 23665 | { |
| 23666 | fnaddr = XEXP (DECL_RTL (function), 0); |
| 23667 | tmp = gen_rtx_MEM (QImode, fnaddr); |
| 23668 | tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); |
| 23669 | tmp = emit_call_insn (tmp); |
| 23670 | SIBLING_CALL_P (tmp) = 1; |
| 23671 | } |
| 23672 | else |
| 23673 | emit_jump_insn (gen_indirect_jump (fnaddr)); |
| 23674 | } |
| 23675 | else |
| 23676 | { |
| 23677 | if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr)) |
| 23678 | { |
| 23679 | // CM_LARGE_PIC always uses pseudo PIC register which is |
| 23680 | // uninitialized. Since FUNCTION is local and calling it |
| 23681 | // doesn't go through PLT, we use scratch register %r11 as |
| 23682 | // PIC register and initialize it here. |
| 23683 | pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG); |
| 23684 | ix86_init_large_pic_reg (tmp_regno); |
| 23685 | fnaddr = legitimize_pic_address (orig: fnaddr, |
| 23686 | reg: gen_rtx_REG (Pmode, tmp_regno)); |
| 23687 | } |
| 23688 | |
| 23689 | if (!sibcall_insn_operand (fnaddr, word_mode)) |
| 23690 | { |
| 23691 | tmp = gen_rtx_REG (word_mode, tmp_regno); |
| 23692 | if (GET_MODE (fnaddr) != word_mode) |
| 23693 | fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); |
| 23694 | emit_move_insn (tmp, fnaddr); |
| 23695 | fnaddr = tmp; |
| 23696 | } |
| 23697 | |
| 23698 | tmp = gen_rtx_MEM (QImode, fnaddr); |
| 23699 | tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); |
| 23700 | tmp = emit_call_insn (tmp); |
| 23701 | SIBLING_CALL_P (tmp) = 1; |
| 23702 | } |
| 23703 | emit_barrier (); |
| 23704 | |
| 23705 | /* Emit just enough of rest_of_compilation to get the insns emitted. */ |
| 23706 | insn = get_insns (); |
| 23707 | shorten_branches (insn); |
| 23708 | assemble_start_function (thunk_fndecl, fnname); |
| 23709 | final_start_function (insn, file, 1); |
| 23710 | final (insn, file, 1); |
| 23711 | final_end_function (); |
| 23712 | assemble_end_function (thunk_fndecl, fnname); |
| 23713 | |
| 23714 | flag_force_indirect_call = saved_flag_force_indirect_call; |
| 23715 | } |
| 23716 | |
| 23717 | static void |
| 23718 | x86_file_start (void) |
| 23719 | { |
| 23720 | default_file_start (); |
| 23721 | if (TARGET_16BIT) |
| 23722 | fputs (s: "\t.code16gcc\n" , stream: asm_out_file); |
| 23723 | #if TARGET_MACHO |
| 23724 | darwin_file_start (); |
| 23725 | #endif |
| 23726 | if (X86_FILE_START_VERSION_DIRECTIVE) |
| 23727 | fputs (s: "\t.version\t\"01.01\"\n" , stream: asm_out_file); |
| 23728 | if (X86_FILE_START_FLTUSED) |
| 23729 | fputs (s: "\t.global\t__fltused\n" , stream: asm_out_file); |
| 23730 | if (ix86_asm_dialect == ASM_INTEL) |
| 23731 | fputs (s: "\t.intel_syntax noprefix\n" , stream: asm_out_file); |
| 23732 | } |
| 23733 | |
| 23734 | int |
| 23735 | x86_field_alignment (tree type, int computed) |
| 23736 | { |
| 23737 | machine_mode mode; |
| 23738 | |
| 23739 | if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) |
| 23740 | return computed; |
| 23741 | if (TARGET_IAMCU) |
| 23742 | return iamcu_alignment (type, align: computed); |
| 23743 | type = strip_array_types (type); |
| 23744 | mode = TYPE_MODE (type); |
| 23745 | if (mode == DFmode || mode == DCmode |
| 23746 | || GET_MODE_CLASS (mode) == MODE_INT |
| 23747 | || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) |
| 23748 | { |
| 23749 | if (TYPE_ATOMIC (type) && computed > 32) |
| 23750 | { |
| 23751 | static bool warned; |
| 23752 | |
| 23753 | if (!warned && warn_psabi) |
| 23754 | { |
| 23755 | const char *url |
| 23756 | = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic" ; |
| 23757 | |
| 23758 | warned = true; |
| 23759 | inform (input_location, "the alignment of %<_Atomic %T%> " |
| 23760 | "fields changed in %{GCC 11.1%}" , |
| 23761 | TYPE_MAIN_VARIANT (type), url); |
| 23762 | } |
| 23763 | } |
| 23764 | else |
| 23765 | return MIN (32, computed); |
| 23766 | } |
| 23767 | return computed; |
| 23768 | } |
| 23769 | |
| 23770 | /* Print call to TARGET to FILE. */ |
| 23771 | |
| 23772 | static void |
| 23773 | x86_print_call_or_nop (FILE *file, const char *target, |
| 23774 | const char *label) |
| 23775 | { |
| 23776 | if (flag_nop_mcount || !strcmp (s1: target, s2: "nop" )) |
| 23777 | { |
| 23778 | if (TARGET_16BIT) |
| 23779 | /* 3 byte no-op: lea 0(%si), %si */ |
| 23780 | fprintf (stream: file, format: "%s" ASM_BYTE "0x8d, 0x74, 0x00\n" , label); |
| 23781 | else |
| 23782 | /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ |
| 23783 | fprintf (stream: file, format: "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n" , |
| 23784 | label); |
| 23785 | } |
| 23786 | else if (!TARGET_PECOFF && flag_pic) |
| 23787 | { |
| 23788 | gcc_assert (flag_plt); |
| 23789 | |
| 23790 | fprintf (stream: file, format: "%s\tcall\t%s@PLT\n" , label, target); |
| 23791 | } |
| 23792 | else |
| 23793 | fprintf (stream: file, format: "%s\tcall\t%s\n" , label, target); |
| 23794 | } |
| 23795 | |
| 23796 | static bool |
| 23797 | current_fentry_name (const char **name) |
| 23798 | { |
| 23799 | tree attr = lookup_attribute (attr_name: "fentry_name" , |
| 23800 | DECL_ATTRIBUTES (current_function_decl)); |
| 23801 | if (!attr) |
| 23802 | return false; |
| 23803 | *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); |
| 23804 | return true; |
| 23805 | } |
| 23806 | |
| 23807 | static bool |
| 23808 | current_fentry_section (const char **name) |
| 23809 | { |
| 23810 | tree attr = lookup_attribute (attr_name: "fentry_section" , |
| 23811 | DECL_ATTRIBUTES (current_function_decl)); |
| 23812 | if (!attr) |
| 23813 | return false; |
| 23814 | *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); |
| 23815 | return true; |
| 23816 | } |
| 23817 | |
| 23818 | /* Return a caller-saved register which isn't live or a callee-saved |
| 23819 | register which has been saved on stack in the prologue at entry for |
| 23820 | profile. */ |
| 23821 | |
| 23822 | static int |
| 23823 | x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED) |
| 23824 | { |
| 23825 | /* Use %r10 if the profiler is emitted before the prologue or it isn't |
| 23826 | used by DRAP. */ |
| 23827 | if (ix86_profile_before_prologue () |
| 23828 | || !crtl->drap_reg |
| 23829 | || REGNO (crtl->drap_reg) != R10_REG) |
| 23830 | return R10_REG; |
| 23831 | |
| 23832 | /* The profiler is emitted after the prologue. If there is a |
| 23833 | caller-saved register which isn't live or a callee-saved |
| 23834 | register saved on stack in the prologue, use it. */ |
| 23835 | |
| 23836 | bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
| 23837 | |
| 23838 | int i; |
| 23839 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| 23840 | if (GENERAL_REGNO_P (i) |
| 23841 | && i != R10_REG |
| 23842 | #ifdef NO_PROFILE_COUNTERS |
| 23843 | && (r11_ok || i != R11_REG) |
| 23844 | #else |
| 23845 | && i != R11_REG |
| 23846 | #endif |
| 23847 | && TEST_HARD_REG_BIT (accessible_reg_set, bit: i) |
| 23848 | && (ix86_save_reg (regno: i, maybe_eh_return: true, ignore_outlined: true) |
| 23849 | || (call_used_regs[i] |
| 23850 | && !fixed_regs[i] |
| 23851 | && !REGNO_REG_SET_P (reg_live, i)))) |
| 23852 | return i; |
| 23853 | |
| 23854 | sorry ("no register available for profiling %<-mcmodel=large%s%>" , |
| 23855 | ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "" ); |
| 23856 | |
| 23857 | return R10_REG; |
| 23858 | } |
| 23859 | |
| 23860 | /* Output assembler code to FILE to increment profiler label # LABELNO |
| 23861 | for profiling a function entry. */ |
| 23862 | void |
| 23863 | x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) |
| 23864 | { |
| 23865 | if (cfun->machine->insn_queued_at_entrance) |
| 23866 | { |
| 23867 | if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR) |
| 23868 | fprintf (stream: file, format: "\t%s\n" , TARGET_64BIT ? "endbr64" : "endbr32" ); |
| 23869 | unsigned int patch_area_size |
| 23870 | = crtl->patch_area_size - crtl->patch_area_entry; |
| 23871 | if (patch_area_size) |
| 23872 | ix86_output_patchable_area (patch_area_size, |
| 23873 | crtl->patch_area_entry == 0); |
| 23874 | } |
| 23875 | |
| 23876 | const char *mcount_name = MCOUNT_NAME; |
| 23877 | |
| 23878 | bool fentry_section_p |
| 23879 | = (flag_record_mcount |
| 23880 | || lookup_attribute (attr_name: "fentry_section" , |
| 23881 | DECL_ATTRIBUTES (current_function_decl))); |
| 23882 | |
| 23883 | const char *label = fentry_section_p ? "1:" : "" ; |
| 23884 | |
| 23885 | if (current_fentry_name (name: &mcount_name)) |
| 23886 | ; |
| 23887 | else if (fentry_name) |
| 23888 | mcount_name = fentry_name; |
| 23889 | else if (flag_fentry) |
| 23890 | mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE; |
| 23891 | |
| 23892 | if (TARGET_64BIT) |
| 23893 | { |
| 23894 | #ifndef NO_PROFILE_COUNTERS |
| 23895 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
| 23896 | fprintf (file, "\tlea\tr11, %sP%d[rip]\n" , LPREFIX, labelno); |
| 23897 | else |
| 23898 | fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n" , LPREFIX, labelno); |
| 23899 | #endif |
| 23900 | |
| 23901 | int scratch; |
| 23902 | const char *reg; |
| 23903 | char legacy_reg[4] = { 0 }; |
| 23904 | |
| 23905 | if (!TARGET_PECOFF) |
| 23906 | { |
| 23907 | switch (ix86_cmodel) |
| 23908 | { |
| 23909 | case CM_LARGE: |
| 23910 | scratch = x86_64_select_profile_regnum (r11_ok: true); |
| 23911 | reg = hi_reg_name[scratch]; |
| 23912 | if (LEGACY_INT_REGNO_P (scratch)) |
| 23913 | { |
| 23914 | legacy_reg[0] = 'r'; |
| 23915 | legacy_reg[1] = reg[0]; |
| 23916 | legacy_reg[2] = reg[1]; |
| 23917 | reg = legacy_reg; |
| 23918 | } |
| 23919 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
| 23920 | fprintf (stream: file, format: "%s\tmovabs\t%s, OFFSET FLAT:%s\n" |
| 23921 | "\tcall\t%s\n" , label, reg, mcount_name, |
| 23922 | reg); |
| 23923 | else |
| 23924 | fprintf (stream: file, format: "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n" , |
| 23925 | label, mcount_name, reg, reg); |
| 23926 | break; |
| 23927 | case CM_LARGE_PIC: |
| 23928 | #ifdef NO_PROFILE_COUNTERS |
| 23929 | scratch = x86_64_select_profile_regnum (r11_ok: false); |
| 23930 | reg = hi_reg_name[scratch]; |
| 23931 | if (LEGACY_INT_REGNO_P (scratch)) |
| 23932 | { |
| 23933 | legacy_reg[0] = 'r'; |
| 23934 | legacy_reg[1] = reg[0]; |
| 23935 | legacy_reg[2] = reg[1]; |
| 23936 | reg = legacy_reg; |
| 23937 | } |
| 23938 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
| 23939 | { |
| 23940 | fprintf (stream: file, format: "1:movabs\tr11, " |
| 23941 | "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n" ); |
| 23942 | fprintf (stream: file, format: "\tlea\t%s, 1b[rip]\n" , reg); |
| 23943 | fprintf (stream: file, format: "\tadd\t%s, r11\n" , reg); |
| 23944 | fprintf (stream: file, format: "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n" , |
| 23945 | mcount_name); |
| 23946 | fprintf (stream: file, format: "\tadd\t%s, r11\n" , reg); |
| 23947 | fprintf (stream: file, format: "\tcall\t%s\n" , reg); |
| 23948 | break; |
| 23949 | } |
| 23950 | fprintf (stream: file, |
| 23951 | format: "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n" ); |
| 23952 | fprintf (stream: file, format: "\tleaq\t1b(%%rip), %%%s\n" , reg); |
| 23953 | fprintf (stream: file, format: "\taddq\t%%r11, %%%s\n" , reg); |
| 23954 | fprintf (stream: file, format: "\tmovabsq\t$%s@PLTOFF, %%r11\n" , mcount_name); |
| 23955 | fprintf (stream: file, format: "\taddq\t%%r11, %%%s\n" , reg); |
| 23956 | fprintf (stream: file, format: "\tcall\t*%%%s\n" , reg); |
| 23957 | #else |
| 23958 | sorry ("profiling %<-mcmodel=large%> with PIC is not supported" ); |
| 23959 | #endif |
| 23960 | break; |
| 23961 | case CM_SMALL_PIC: |
| 23962 | case CM_MEDIUM_PIC: |
| 23963 | if (!flag_plt) |
| 23964 | { |
| 23965 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
| 23966 | fprintf (stream: file, format: "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n" , |
| 23967 | label, mcount_name); |
| 23968 | else |
| 23969 | fprintf (stream: file, format: "%s\tcall\t*%s@GOTPCREL(%%rip)\n" , |
| 23970 | label, mcount_name); |
| 23971 | break; |
| 23972 | } |
| 23973 | /* fall through */ |
| 23974 | default: |
| 23975 | x86_print_call_or_nop (file, target: mcount_name, label); |
| 23976 | break; |
| 23977 | } |
| 23978 | } |
| 23979 | else |
| 23980 | x86_print_call_or_nop (file, target: mcount_name, label); |
| 23981 | } |
| 23982 | else if (flag_pic) |
| 23983 | { |
| 23984 | #ifndef NO_PROFILE_COUNTERS |
| 23985 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
| 23986 | fprintf (file, |
| 23987 | "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n" , |
| 23988 | LPREFIX, labelno); |
| 23989 | else |
| 23990 | fprintf (file, |
| 23991 | "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n" , |
| 23992 | LPREFIX, labelno); |
| 23993 | #endif |
| 23994 | if (flag_plt) |
| 23995 | x86_print_call_or_nop (file, target: mcount_name, label); |
| 23996 | else if (ASSEMBLER_DIALECT == ASM_INTEL) |
| 23997 | fprintf (stream: file, format: "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n" , |
| 23998 | label, mcount_name); |
| 23999 | else |
| 24000 | fprintf (stream: file, format: "%s\tcall\t*%s@GOT(%%ebx)\n" , |
| 24001 | label, mcount_name); |
| 24002 | } |
| 24003 | else |
| 24004 | { |
| 24005 | #ifndef NO_PROFILE_COUNTERS |
| 24006 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
| 24007 | fprintf (file, |
| 24008 | "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n" , |
| 24009 | LPREFIX, labelno); |
| 24010 | else |
| 24011 | fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n" , |
| 24012 | LPREFIX, labelno); |
| 24013 | #endif |
| 24014 | x86_print_call_or_nop (file, target: mcount_name, label); |
| 24015 | } |
| 24016 | |
| 24017 | if (fentry_section_p) |
| 24018 | { |
| 24019 | const char *sname = "__mcount_loc" ; |
| 24020 | |
| 24021 | if (current_fentry_section (name: &sname)) |
| 24022 | ; |
| 24023 | else if (fentry_section) |
| 24024 | sname = fentry_section; |
| 24025 | |
| 24026 | fprintf (stream: file, format: "\t.section %s, \"a\",@progbits\n" , sname); |
| 24027 | fprintf (stream: file, format: "\t.%s 1b\n" , TARGET_64BIT ? "quad" : "long" ); |
| 24028 | fprintf (stream: file, format: "\t.previous\n" ); |
| 24029 | } |
| 24030 | } |
| 24031 | |
| 24032 | /* We don't have exact information about the insn sizes, but we may assume |
| 24033 | quite safely that we are informed about all 1 byte insns and memory |
| 24034 | address sizes. This is enough to eliminate unnecessary padding in |
| 24035 | 99% of cases. */ |
| 24036 | |
| 24037 | int |
| 24038 | ix86_min_insn_size (rtx_insn *insn) |
| 24039 | { |
| 24040 | int l = 0, len; |
| 24041 | |
| 24042 | if (!INSN_P (insn) || !active_insn_p (insn)) |
| 24043 | return 0; |
| 24044 | |
| 24045 | /* Discard alignments we've emit and jump instructions. */ |
| 24046 | if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE |
| 24047 | && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) |
| 24048 | return 0; |
| 24049 | |
| 24050 | /* Important case - calls are always 5 bytes. |
| 24051 | It is common to have many calls in the row. */ |
| 24052 | if (CALL_P (insn) |
| 24053 | && symbolic_reference_mentioned_p (op: PATTERN (insn)) |
| 24054 | && !SIBLING_CALL_P (insn)) |
| 24055 | return 5; |
| 24056 | len = get_attr_length (insn); |
| 24057 | if (len <= 1) |
| 24058 | return 1; |
| 24059 | |
| 24060 | /* For normal instructions we rely on get_attr_length being exact, |
| 24061 | with a few exceptions. */ |
| 24062 | if (!JUMP_P (insn)) |
| 24063 | { |
| 24064 | enum attr_type type = get_attr_type (insn); |
| 24065 | |
| 24066 | switch (type) |
| 24067 | { |
| 24068 | case TYPE_MULTI: |
| 24069 | if (GET_CODE (PATTERN (insn)) == ASM_INPUT |
| 24070 | || asm_noperands (PATTERN (insn)) >= 0) |
| 24071 | return 0; |
| 24072 | break; |
| 24073 | case TYPE_OTHER: |
| 24074 | case TYPE_FCMP: |
| 24075 | break; |
| 24076 | default: |
| 24077 | /* Otherwise trust get_attr_length. */ |
| 24078 | return len; |
| 24079 | } |
| 24080 | |
| 24081 | l = get_attr_length_address (insn); |
| 24082 | if (l < 4 && symbolic_reference_mentioned_p (op: PATTERN (insn))) |
| 24083 | l = 4; |
| 24084 | } |
| 24085 | if (l) |
| 24086 | return 1+l; |
| 24087 | else |
| 24088 | return 2; |
| 24089 | } |
| 24090 | |
| 24091 | #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN |
| 24092 | |
| 24093 | /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte |
| 24094 | window. */ |
| 24095 | |
| 24096 | static void |
| 24097 | ix86_avoid_jump_mispredicts (void) |
| 24098 | { |
| 24099 | rtx_insn *insn, *start = get_insns (); |
| 24100 | int nbytes = 0, njumps = 0; |
| 24101 | bool isjump = false; |
| 24102 | |
| 24103 | /* Look for all minimal intervals of instructions containing 4 jumps. |
| 24104 | The intervals are bounded by START and INSN. NBYTES is the total |
| 24105 | size of instructions in the interval including INSN and not including |
| 24106 | START. When the NBYTES is smaller than 16 bytes, it is possible |
| 24107 | that the end of START and INSN ends up in the same 16byte page. |
| 24108 | |
| 24109 | The smallest offset in the page INSN can start is the case where START |
| 24110 | ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). |
| 24111 | We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN). |
| 24112 | |
| 24113 | Don't consider asm goto as jump, while it can contain a jump, it doesn't |
| 24114 | have to, control transfer to label(s) can be performed through other |
| 24115 | means, and also we estimate minimum length of all asm stmts as 0. */ |
| 24116 | for (insn = start; insn; insn = NEXT_INSN (insn)) |
| 24117 | { |
| 24118 | int min_size; |
| 24119 | |
| 24120 | if (LABEL_P (insn)) |
| 24121 | { |
| 24122 | align_flags alignment = label_to_alignment (insn); |
| 24123 | int align = alignment.levels[0].log; |
| 24124 | int max_skip = alignment.levels[0].maxskip; |
| 24125 | |
| 24126 | if (max_skip > 15) |
| 24127 | max_skip = 15; |
| 24128 | /* If align > 3, only up to 16 - max_skip - 1 bytes can be |
| 24129 | already in the current 16 byte page, because otherwise |
| 24130 | ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer |
| 24131 | bytes to reach 16 byte boundary. */ |
| 24132 | if (align <= 0 |
| 24133 | || (align <= 3 && max_skip != (1 << align) - 1)) |
| 24134 | max_skip = 0; |
| 24135 | if (dump_file) |
| 24136 | fprintf (stream: dump_file, format: "Label %i with max_skip %i\n" , |
| 24137 | INSN_UID (insn), max_skip); |
| 24138 | if (max_skip) |
| 24139 | { |
| 24140 | while (nbytes + max_skip >= 16) |
| 24141 | { |
| 24142 | start = NEXT_INSN (insn: start); |
| 24143 | if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0) |
| 24144 | || CALL_P (start)) |
| 24145 | njumps--, isjump = true; |
| 24146 | else |
| 24147 | isjump = false; |
| 24148 | nbytes -= ix86_min_insn_size (insn: start); |
| 24149 | } |
| 24150 | } |
| 24151 | continue; |
| 24152 | } |
| 24153 | |
| 24154 | min_size = ix86_min_insn_size (insn); |
| 24155 | nbytes += min_size; |
| 24156 | if (dump_file) |
| 24157 | fprintf (stream: dump_file, format: "Insn %i estimated to %i bytes\n" , |
| 24158 | INSN_UID (insn), min_size); |
| 24159 | if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0) |
| 24160 | || CALL_P (insn)) |
| 24161 | njumps++; |
| 24162 | else |
| 24163 | continue; |
| 24164 | |
| 24165 | while (njumps > 3) |
| 24166 | { |
| 24167 | start = NEXT_INSN (insn: start); |
| 24168 | if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0) |
| 24169 | || CALL_P (start)) |
| 24170 | njumps--, isjump = true; |
| 24171 | else |
| 24172 | isjump = false; |
| 24173 | nbytes -= ix86_min_insn_size (insn: start); |
| 24174 | } |
| 24175 | gcc_assert (njumps >= 0); |
| 24176 | if (dump_file) |
| 24177 | fprintf (stream: dump_file, format: "Interval %i to %i has %i bytes\n" , |
| 24178 | INSN_UID (insn: start), INSN_UID (insn), nbytes); |
| 24179 | |
| 24180 | if (njumps == 3 && isjump && nbytes < 16) |
| 24181 | { |
| 24182 | int padsize = 15 - nbytes + ix86_min_insn_size (insn); |
| 24183 | |
| 24184 | if (dump_file) |
| 24185 | fprintf (stream: dump_file, format: "Padding insn %i by %i bytes!\n" , |
| 24186 | INSN_UID (insn), padsize); |
| 24187 | emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn); |
| 24188 | } |
| 24189 | } |
| 24190 | } |
| 24191 | #endif |
| 24192 | |
| 24193 | /* AMD Athlon works faster |
| 24194 | when RET is not destination of conditional jump or directly preceded |
| 24195 | by other jump instruction. We avoid the penalty by inserting NOP just |
| 24196 | before the RET instructions in such cases. */ |
| 24197 | static void |
| 24198 | ix86_pad_returns (void) |
| 24199 | { |
| 24200 | edge e; |
| 24201 | edge_iterator ei; |
| 24202 | |
| 24203 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
| 24204 | { |
| 24205 | basic_block bb = e->src; |
| 24206 | rtx_insn *ret = BB_END (bb); |
| 24207 | rtx_insn *prev; |
| 24208 | bool replace = false; |
| 24209 | |
| 24210 | if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret)) |
| 24211 | || optimize_bb_for_size_p (bb)) |
| 24212 | continue; |
| 24213 | for (prev = PREV_INSN (insn: ret); prev; prev = PREV_INSN (insn: prev)) |
| 24214 | if (active_insn_p (prev) || LABEL_P (prev)) |
| 24215 | break; |
| 24216 | if (prev && LABEL_P (prev)) |
| 24217 | { |
| 24218 | edge e; |
| 24219 | edge_iterator ei; |
| 24220 | |
| 24221 | FOR_EACH_EDGE (e, ei, bb->preds) |
| 24222 | if (EDGE_FREQUENCY (e) && e->src->index >= 0 |
| 24223 | && !(e->flags & EDGE_FALLTHRU)) |
| 24224 | { |
| 24225 | replace = true; |
| 24226 | break; |
| 24227 | } |
| 24228 | } |
| 24229 | if (!replace) |
| 24230 | { |
| 24231 | prev = prev_active_insn (ret); |
| 24232 | if (prev |
| 24233 | && ((JUMP_P (prev) && any_condjump_p (prev)) |
| 24234 | || CALL_P (prev))) |
| 24235 | replace = true; |
| 24236 | /* Empty functions get branch mispredict even when |
| 24237 | the jump destination is not visible to us. */ |
| 24238 | if (!prev && !optimize_function_for_size_p (cfun)) |
| 24239 | replace = true; |
| 24240 | } |
| 24241 | if (replace) |
| 24242 | { |
| 24243 | emit_jump_insn_before (gen_simple_return_internal_long (), ret); |
| 24244 | delete_insn (ret); |
| 24245 | } |
| 24246 | } |
| 24247 | } |
| 24248 | |
| 24249 | /* Count the minimum number of instructions in BB. Return 4 if the |
| 24250 | number of instructions >= 4. */ |
| 24251 | |
| 24252 | static int |
| 24253 | ix86_count_insn_bb (basic_block bb) |
| 24254 | { |
| 24255 | rtx_insn *insn; |
| 24256 | int insn_count = 0; |
| 24257 | |
| 24258 | /* Count number of instructions in this block. Return 4 if the number |
| 24259 | of instructions >= 4. */ |
| 24260 | FOR_BB_INSNS (bb, insn) |
| 24261 | { |
| 24262 | /* Only happen in exit blocks. */ |
| 24263 | if (JUMP_P (insn) |
| 24264 | && ANY_RETURN_P (PATTERN (insn))) |
| 24265 | break; |
| 24266 | |
| 24267 | if (NONDEBUG_INSN_P (insn) |
| 24268 | && GET_CODE (PATTERN (insn)) != USE |
| 24269 | && GET_CODE (PATTERN (insn)) != CLOBBER) |
| 24270 | { |
| 24271 | insn_count++; |
| 24272 | if (insn_count >= 4) |
| 24273 | return insn_count; |
| 24274 | } |
| 24275 | } |
| 24276 | |
| 24277 | return insn_count; |
| 24278 | } |
| 24279 | |
| 24280 | |
| 24281 | /* Count the minimum number of instructions in code path in BB. |
| 24282 | Return 4 if the number of instructions >= 4. */ |
| 24283 | |
| 24284 | static int |
| 24285 | ix86_count_insn (basic_block bb) |
| 24286 | { |
| 24287 | edge e; |
| 24288 | edge_iterator ei; |
| 24289 | int min_prev_count; |
| 24290 | |
| 24291 | /* Only bother counting instructions along paths with no |
| 24292 | more than 2 basic blocks between entry and exit. Given |
| 24293 | that BB has an edge to exit, determine if a predecessor |
| 24294 | of BB has an edge from entry. If so, compute the number |
| 24295 | of instructions in the predecessor block. If there |
| 24296 | happen to be multiple such blocks, compute the minimum. */ |
| 24297 | min_prev_count = 4; |
| 24298 | FOR_EACH_EDGE (e, ei, bb->preds) |
| 24299 | { |
| 24300 | edge prev_e; |
| 24301 | edge_iterator prev_ei; |
| 24302 | |
| 24303 | if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
| 24304 | { |
| 24305 | min_prev_count = 0; |
| 24306 | break; |
| 24307 | } |
| 24308 | FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) |
| 24309 | { |
| 24310 | if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
| 24311 | { |
| 24312 | int count = ix86_count_insn_bb (bb: e->src); |
| 24313 | if (count < min_prev_count) |
| 24314 | min_prev_count = count; |
| 24315 | break; |
| 24316 | } |
| 24317 | } |
| 24318 | } |
| 24319 | |
| 24320 | if (min_prev_count < 4) |
| 24321 | min_prev_count += ix86_count_insn_bb (bb); |
| 24322 | |
| 24323 | return min_prev_count; |
| 24324 | } |
| 24325 | |
| 24326 | /* Pad short function to 4 instructions. */ |
| 24327 | |
| 24328 | static void |
| 24329 | ix86_pad_short_function (void) |
| 24330 | { |
| 24331 | edge e; |
| 24332 | edge_iterator ei; |
| 24333 | |
| 24334 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
| 24335 | { |
| 24336 | rtx_insn *ret = BB_END (e->src); |
| 24337 | if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret))) |
| 24338 | { |
| 24339 | int insn_count = ix86_count_insn (bb: e->src); |
| 24340 | |
| 24341 | /* Pad short function. */ |
| 24342 | if (insn_count < 4) |
| 24343 | { |
| 24344 | rtx_insn *insn = ret; |
| 24345 | |
| 24346 | /* Find epilogue. */ |
| 24347 | while (insn |
| 24348 | && (!NOTE_P (insn) |
| 24349 | || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) |
| 24350 | insn = PREV_INSN (insn); |
| 24351 | |
| 24352 | if (!insn) |
| 24353 | insn = ret; |
| 24354 | |
| 24355 | /* Two NOPs count as one instruction. */ |
| 24356 | insn_count = 2 * (4 - insn_count); |
| 24357 | emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); |
| 24358 | } |
| 24359 | } |
| 24360 | } |
| 24361 | } |
| 24362 | |
| 24363 | /* Fix up a Windows system unwinder issue. If an EH region falls through into |
| 24364 | the epilogue, the Windows system unwinder will apply epilogue logic and |
| 24365 | produce incorrect offsets. This can be avoided by adding a nop between |
| 24366 | the last insn that can throw and the first insn of the epilogue. */ |
| 24367 | |
| 24368 | static void |
| 24369 | ix86_seh_fixup_eh_fallthru (void) |
| 24370 | { |
| 24371 | edge e; |
| 24372 | edge_iterator ei; |
| 24373 | |
| 24374 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
| 24375 | { |
| 24376 | rtx_insn *insn, *next; |
| 24377 | |
| 24378 | /* Find the beginning of the epilogue. */ |
| 24379 | for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn)) |
| 24380 | if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) |
| 24381 | break; |
| 24382 | if (insn == NULL) |
| 24383 | continue; |
| 24384 | |
| 24385 | /* We only care about preceding insns that can throw. */ |
| 24386 | insn = prev_active_insn (insn); |
| 24387 | if (insn == NULL || !can_throw_internal (insn)) |
| 24388 | continue; |
| 24389 | |
| 24390 | /* Do not separate calls from their debug information. */ |
| 24391 | for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (insn: next)) |
| 24392 | if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION) |
| 24393 | insn = next; |
| 24394 | else |
| 24395 | break; |
| 24396 | |
| 24397 | emit_insn_after (gen_nops (const1_rtx), insn); |
| 24398 | } |
| 24399 | } |
| 24400 | /* Split vector load from parm_decl to elemental loads to avoid STLF |
| 24401 | stalls. */ |
| 24402 | static void |
| 24403 | ix86_split_stlf_stall_load () |
| 24404 | { |
| 24405 | rtx_insn* insn, *start = get_insns (); |
| 24406 | unsigned window = 0; |
| 24407 | |
| 24408 | for (insn = start; insn; insn = NEXT_INSN (insn)) |
| 24409 | { |
| 24410 | if (!NONDEBUG_INSN_P (insn)) |
| 24411 | continue; |
| 24412 | window++; |
| 24413 | /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each |
| 24414 | other, just emulate for pipeline) before stalled load, stlf stall |
| 24415 | case is as fast as no stall cases on CLX. |
| 24416 | Since CFG is freed before machine_reorg, just do a rough |
| 24417 | calculation of the window according to the layout. */ |
| 24418 | if (window > (unsigned) x86_stlf_window_ninsns) |
| 24419 | return; |
| 24420 | |
| 24421 | if (any_uncondjump_p (insn) |
| 24422 | || ANY_RETURN_P (PATTERN (insn)) |
| 24423 | || CALL_P (insn)) |
| 24424 | return; |
| 24425 | |
| 24426 | rtx set = single_set (insn); |
| 24427 | if (!set) |
| 24428 | continue; |
| 24429 | rtx src = SET_SRC (set); |
| 24430 | if (!MEM_P (src) |
| 24431 | /* Only handle V2DFmode load since it doesn't need any scratch |
| 24432 | register. */ |
| 24433 | || GET_MODE (src) != E_V2DFmode |
| 24434 | || !MEM_EXPR (src) |
| 24435 | || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL) |
| 24436 | continue; |
| 24437 | |
| 24438 | rtx zero = CONST0_RTX (V2DFmode); |
| 24439 | rtx dest = SET_DEST (set); |
| 24440 | rtx m = adjust_address (src, DFmode, 0); |
| 24441 | rtx loadlpd = gen_sse2_loadlpd (dest, zero, m); |
| 24442 | emit_insn_before (loadlpd, insn); |
| 24443 | m = adjust_address (src, DFmode, 8); |
| 24444 | rtx loadhpd = gen_sse2_loadhpd (dest, dest, m); |
| 24445 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 24446 | { |
| 24447 | fputs (s: "Due to potential STLF stall, split instruction:\n" , |
| 24448 | stream: dump_file); |
| 24449 | print_rtl_single (dump_file, insn); |
| 24450 | fputs (s: "To:\n" , stream: dump_file); |
| 24451 | print_rtl_single (dump_file, loadlpd); |
| 24452 | print_rtl_single (dump_file, loadhpd); |
| 24453 | } |
| 24454 | PATTERN (insn) = loadhpd; |
| 24455 | INSN_CODE (insn) = -1; |
| 24456 | gcc_assert (recog_memoized (insn) != -1); |
| 24457 | } |
| 24458 | } |
| 24459 | |
| 24460 | /* Implement machine specific optimizations. We implement padding of returns |
| 24461 | for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ |
| 24462 | static void |
| 24463 | ix86_reorg (void) |
| 24464 | { |
| 24465 | /* We are freeing block_for_insn in the toplev to keep compatibility |
| 24466 | with old MDEP_REORGS that are not CFG based. Recompute it now. */ |
| 24467 | compute_bb_for_insn (); |
| 24468 | |
| 24469 | if (TARGET_SEH && current_function_has_exception_handlers ()) |
| 24470 | ix86_seh_fixup_eh_fallthru (); |
| 24471 | |
| 24472 | if (optimize && optimize_function_for_speed_p (cfun)) |
| 24473 | { |
| 24474 | if (TARGET_SSE2) |
| 24475 | ix86_split_stlf_stall_load (); |
| 24476 | if (TARGET_PAD_SHORT_FUNCTION) |
| 24477 | ix86_pad_short_function (); |
| 24478 | else if (TARGET_PAD_RETURNS) |
| 24479 | ix86_pad_returns (); |
| 24480 | #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN |
| 24481 | if (TARGET_FOUR_JUMP_LIMIT) |
| 24482 | ix86_avoid_jump_mispredicts (); |
| 24483 | #endif |
| 24484 | } |
| 24485 | } |
| 24486 | |
| 24487 | /* Return nonzero when QImode register that must be represented via REX prefix |
| 24488 | is used. */ |
| 24489 | bool |
| 24490 | x86_extended_QIreg_mentioned_p (rtx_insn *insn) |
| 24491 | { |
| 24492 | int i; |
| 24493 | extract_insn_cached (insn); |
| 24494 | for (i = 0; i < recog_data.n_operands; i++) |
| 24495 | if (GENERAL_REG_P (recog_data.operand[i]) |
| 24496 | && !QI_REGNO_P (REGNO (recog_data.operand[i]))) |
| 24497 | return true; |
| 24498 | return false; |
| 24499 | } |
| 24500 | |
| 24501 | /* Return true when INSN mentions register that must be encoded using REX |
| 24502 | prefix. */ |
| 24503 | bool |
| 24504 | x86_extended_reg_mentioned_p (rtx insn) |
| 24505 | { |
| 24506 | subrtx_iterator::array_type array; |
| 24507 | FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) |
| 24508 | { |
| 24509 | const_rtx x = *iter; |
| 24510 | if (REG_P (x) |
| 24511 | && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x)) |
| 24512 | || REX2_INT_REGNO_P (REGNO (x)))) |
| 24513 | return true; |
| 24514 | } |
| 24515 | return false; |
| 24516 | } |
| 24517 | |
| 24518 | /* Return true when INSN mentions register that must be encoded using REX2 |
| 24519 | prefix. */ |
| 24520 | bool |
| 24521 | x86_extended_rex2reg_mentioned_p (rtx insn) |
| 24522 | { |
| 24523 | subrtx_iterator::array_type array; |
| 24524 | FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) |
| 24525 | { |
| 24526 | const_rtx x = *iter; |
| 24527 | if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x))) |
| 24528 | return true; |
| 24529 | } |
| 24530 | return false; |
| 24531 | } |
| 24532 | |
| 24533 | /* Return true when rtx operands mentions register that must be encoded using |
| 24534 | evex prefix. */ |
| 24535 | bool |
| 24536 | x86_evex_reg_mentioned_p (rtx operands[], int nops) |
| 24537 | { |
| 24538 | int i; |
| 24539 | for (i = 0; i < nops; i++) |
| 24540 | if (EXT_REX_SSE_REG_P (operands[i]) |
| 24541 | || x86_extended_rex2reg_mentioned_p (insn: operands[i])) |
| 24542 | return true; |
| 24543 | return false; |
| 24544 | } |
| 24545 | |
| 24546 | /* If profitable, negate (without causing overflow) integer constant |
| 24547 | of mode MODE at location LOC. Return true in this case. */ |
| 24548 | bool |
| 24549 | x86_maybe_negate_const_int (rtx *loc, machine_mode mode) |
| 24550 | { |
| 24551 | HOST_WIDE_INT val; |
| 24552 | |
| 24553 | if (!CONST_INT_P (*loc)) |
| 24554 | return false; |
| 24555 | |
| 24556 | switch (mode) |
| 24557 | { |
| 24558 | case E_DImode: |
| 24559 | /* DImode x86_64 constants must fit in 32 bits. */ |
| 24560 | gcc_assert (x86_64_immediate_operand (*loc, mode)); |
| 24561 | |
| 24562 | mode = SImode; |
| 24563 | break; |
| 24564 | |
| 24565 | case E_SImode: |
| 24566 | case E_HImode: |
| 24567 | case E_QImode: |
| 24568 | break; |
| 24569 | |
| 24570 | default: |
| 24571 | gcc_unreachable (); |
| 24572 | } |
| 24573 | |
| 24574 | /* Avoid overflows. */ |
| 24575 | if (mode_signbit_p (mode, *loc)) |
| 24576 | return false; |
| 24577 | |
| 24578 | val = INTVAL (*loc); |
| 24579 | |
| 24580 | /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. |
| 24581 | Exceptions: -128 encodes smaller than 128, so swap sign and op. */ |
| 24582 | if ((val < 0 && val != -128) |
| 24583 | || val == 128) |
| 24584 | { |
| 24585 | *loc = GEN_INT (-val); |
| 24586 | return true; |
| 24587 | } |
| 24588 | |
| 24589 | return false; |
| 24590 | } |
| 24591 | |
| 24592 | /* Generate an unsigned DImode/SImode to FP conversion. This is the same code |
| 24593 | optabs would emit if we didn't have TFmode patterns. */ |
| 24594 | |
| 24595 | void |
| 24596 | x86_emit_floatuns (rtx operands[2]) |
| 24597 | { |
| 24598 | rtx_code_label *neglab, *donelab; |
| 24599 | rtx i0, i1, f0, in, out; |
| 24600 | machine_mode mode, inmode; |
| 24601 | |
| 24602 | inmode = GET_MODE (operands[1]); |
| 24603 | gcc_assert (inmode == SImode || inmode == DImode); |
| 24604 | |
| 24605 | out = operands[0]; |
| 24606 | in = force_reg (inmode, operands[1]); |
| 24607 | mode = GET_MODE (out); |
| 24608 | neglab = gen_label_rtx (); |
| 24609 | donelab = gen_label_rtx (); |
| 24610 | f0 = gen_reg_rtx (mode); |
| 24611 | |
| 24612 | emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); |
| 24613 | |
| 24614 | expand_float (out, in, 0); |
| 24615 | |
| 24616 | emit_jump_insn (gen_jump (donelab)); |
| 24617 | emit_barrier (); |
| 24618 | |
| 24619 | emit_label (neglab); |
| 24620 | |
| 24621 | i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, |
| 24622 | 1, OPTAB_DIRECT); |
| 24623 | i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, |
| 24624 | 1, OPTAB_DIRECT); |
| 24625 | i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); |
| 24626 | |
| 24627 | expand_float (f0, i0, 0); |
| 24628 | |
| 24629 | emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); |
| 24630 | |
| 24631 | emit_label (donelab); |
| 24632 | } |
| 24633 | |
| 24634 | /* Return the diagnostic message string if conversion from FROMTYPE to |
| 24635 | TOTYPE is not allowed, NULL otherwise. */ |
| 24636 | |
| 24637 | static const char * |
| 24638 | ix86_invalid_conversion (const_tree fromtype, const_tree totype) |
| 24639 | { |
| 24640 | machine_mode from_mode = element_mode (fromtype); |
| 24641 | machine_mode to_mode = element_mode (totype); |
| 24642 | |
| 24643 | if (!TARGET_SSE2 && from_mode != to_mode) |
| 24644 | { |
| 24645 | /* Do no allow conversions to/from BFmode/HFmode scalar types |
| 24646 | when TARGET_SSE2 is not available. */ |
| 24647 | if (from_mode == BFmode) |
| 24648 | return N_("invalid conversion from type %<__bf16%> " |
| 24649 | "without option %<-msse2%>" ); |
| 24650 | if (from_mode == HFmode) |
| 24651 | return N_("invalid conversion from type %<_Float16%> " |
| 24652 | "without option %<-msse2%>" ); |
| 24653 | if (to_mode == BFmode) |
| 24654 | return N_("invalid conversion to type %<__bf16%> " |
| 24655 | "without option %<-msse2%>" ); |
| 24656 | if (to_mode == HFmode) |
| 24657 | return N_("invalid conversion to type %<_Float16%> " |
| 24658 | "without option %<-msse2%>" ); |
| 24659 | } |
| 24660 | |
| 24661 | /* Warn for silent implicit conversion between __bf16 and short, |
| 24662 | since __bfloat16 is refined as real __bf16 instead of short |
| 24663 | since GCC13. */ |
| 24664 | if (element_mode (fromtype) != element_mode (totype) |
| 24665 | && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT)) |
| 24666 | { |
| 24667 | /* Warn for silent implicit conversion where user may expect |
| 24668 | a bitcast. */ |
| 24669 | if ((TYPE_MODE (fromtype) == BFmode |
| 24670 | && TYPE_MODE (totype) == HImode) |
| 24671 | || (TYPE_MODE (totype) == BFmode |
| 24672 | && TYPE_MODE (fromtype) == HImode)) |
| 24673 | warning (0, "%<__bfloat16%> is redefined from typedef %<short%> " |
| 24674 | "to real %<__bf16%> since GCC 13.1, be careful of " |
| 24675 | "implicit conversion between %<__bf16%> and %<short%>; " |
| 24676 | "an explicit bitcast may be needed here" ); |
| 24677 | } |
| 24678 | |
| 24679 | /* Conversion allowed. */ |
| 24680 | return NULL; |
| 24681 | } |
| 24682 | |
| 24683 | /* Return the diagnostic message string if the unary operation OP is |
| 24684 | not permitted on TYPE, NULL otherwise. */ |
| 24685 | |
| 24686 | static const char * |
| 24687 | ix86_invalid_unary_op (int op, const_tree type) |
| 24688 | { |
| 24689 | machine_mode mmode = element_mode (type); |
| 24690 | /* Reject all single-operand operations on BFmode/HFmode except for & |
| 24691 | when TARGET_SSE2 is not available. */ |
| 24692 | if (!TARGET_SSE2 && op != ADDR_EXPR) |
| 24693 | { |
| 24694 | if (mmode == BFmode) |
| 24695 | return N_("operation not permitted on type %<__bf16%> " |
| 24696 | "without option %<-msse2%>" ); |
| 24697 | if (mmode == HFmode) |
| 24698 | return N_("operation not permitted on type %<_Float16%> " |
| 24699 | "without option %<-msse2%>" ); |
| 24700 | } |
| 24701 | |
| 24702 | /* Operation allowed. */ |
| 24703 | return NULL; |
| 24704 | } |
| 24705 | |
| 24706 | /* Return the diagnostic message string if the binary operation OP is |
| 24707 | not permitted on TYPE1 and TYPE2, NULL otherwise. */ |
| 24708 | |
| 24709 | static const char * |
| 24710 | ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, |
| 24711 | const_tree type2) |
| 24712 | { |
| 24713 | machine_mode type1_mode = element_mode (type1); |
| 24714 | machine_mode type2_mode = element_mode (type2); |
| 24715 | /* Reject all 2-operand operations on BFmode or HFmode |
| 24716 | when TARGET_SSE2 is not available. */ |
| 24717 | if (!TARGET_SSE2) |
| 24718 | { |
| 24719 | if (type1_mode == BFmode || type2_mode == BFmode) |
| 24720 | return N_("operation not permitted on type %<__bf16%> " |
| 24721 | "without option %<-msse2%>" ); |
| 24722 | |
| 24723 | if (type1_mode == HFmode || type2_mode == HFmode) |
| 24724 | return N_("operation not permitted on type %<_Float16%> " |
| 24725 | "without option %<-msse2%>" ); |
| 24726 | } |
| 24727 | |
| 24728 | /* Operation allowed. */ |
| 24729 | return NULL; |
| 24730 | } |
| 24731 | |
| 24732 | |
| 24733 | /* Target hook for scalar_mode_supported_p. */ |
| 24734 | static bool |
| 24735 | ix86_scalar_mode_supported_p (scalar_mode mode) |
| 24736 | { |
| 24737 | if (DECIMAL_FLOAT_MODE_P (mode)) |
| 24738 | return default_decimal_float_supported_p (); |
| 24739 | else if (mode == TFmode) |
| 24740 | return true; |
| 24741 | else if (mode == HFmode || mode == BFmode) |
| 24742 | return true; |
| 24743 | else |
| 24744 | return default_scalar_mode_supported_p (mode); |
| 24745 | } |
| 24746 | |
| 24747 | /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE |
| 24748 | if MODE is HFmode, and punt to the generic implementation otherwise. */ |
| 24749 | |
| 24750 | static bool |
| 24751 | ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode) |
| 24752 | { |
| 24753 | /* NB: Always return TRUE for HFmode so that the _Float16 type will |
| 24754 | be defined by the C front-end for AVX512FP16 intrinsics. We will |
| 24755 | issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't |
| 24756 | enabled. */ |
| 24757 | return ((mode == HFmode || mode == BFmode) |
| 24758 | ? true |
| 24759 | : default_libgcc_floating_mode_supported_p (mode)); |
| 24760 | } |
| 24761 | |
| 24762 | /* Implements target hook vector_mode_supported_p. */ |
| 24763 | static bool |
| 24764 | ix86_vector_mode_supported_p (machine_mode mode) |
| 24765 | { |
| 24766 | /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be |
| 24767 | either. */ |
| 24768 | if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode) |
| 24769 | return false; |
| 24770 | if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) |
| 24771 | return true; |
| 24772 | if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) |
| 24773 | return true; |
| 24774 | if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) |
| 24775 | return true; |
| 24776 | if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) |
| 24777 | return true; |
| 24778 | if ((TARGET_MMX || TARGET_MMX_WITH_SSE) |
| 24779 | && VALID_MMX_REG_MODE (mode)) |
| 24780 | return true; |
| 24781 | if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE) |
| 24782 | && VALID_MMX_REG_MODE_3DNOW (mode)) |
| 24783 | return true; |
| 24784 | if (mode == V2QImode) |
| 24785 | return true; |
| 24786 | return false; |
| 24787 | } |
| 24788 | |
| 24789 | /* Target hook for c_mode_for_suffix. */ |
| 24790 | static machine_mode |
| 24791 | ix86_c_mode_for_suffix (char suffix) |
| 24792 | { |
| 24793 | if (suffix == 'q') |
| 24794 | return TFmode; |
| 24795 | if (suffix == 'w') |
| 24796 | return XFmode; |
| 24797 | |
| 24798 | return VOIDmode; |
| 24799 | } |
| 24800 | |
| 24801 | /* Helper function to map common constraints to non-EGPR ones. |
| 24802 | All related constraints have h prefix, and h plus Upper letter |
| 24803 | means the constraint is strictly EGPR enabled, while h plus |
| 24804 | lower letter indicates the constraint is strictly gpr16 only. |
| 24805 | |
| 24806 | Specially for "g" constraint, split it to rmi as there is |
| 24807 | no corresponding general constraint define for backend. |
| 24808 | |
| 24809 | Here is the full list to map constraints that may involve |
| 24810 | gpr to h prefixed. |
| 24811 | |
| 24812 | "g" -> "jrjmi" |
| 24813 | "r" -> "jr" |
| 24814 | "m" -> "jm" |
| 24815 | "<" -> "j<" |
| 24816 | ">" -> "j>" |
| 24817 | "o" -> "jo" |
| 24818 | "V" -> "jV" |
| 24819 | "p" -> "jp" |
| 24820 | "Bm" -> "ja" |
| 24821 | */ |
| 24822 | |
| 24823 | static void map_egpr_constraints (vec<const char *> &constraints) |
| 24824 | { |
| 24825 | for (size_t i = 0; i < constraints.length(); i++) |
| 24826 | { |
| 24827 | const char *cur = constraints[i]; |
| 24828 | |
| 24829 | if (startswith (str: cur, prefix: "=@cc" )) |
| 24830 | continue; |
| 24831 | |
| 24832 | int len = strlen (s: cur); |
| 24833 | auto_vec<char> buf; |
| 24834 | |
| 24835 | for (int j = 0; j < len; j++) |
| 24836 | { |
| 24837 | switch (cur[j]) |
| 24838 | { |
| 24839 | case 'g': |
| 24840 | buf.safe_push (obj: 'j'); |
| 24841 | buf.safe_push (obj: 'r'); |
| 24842 | buf.safe_push (obj: 'j'); |
| 24843 | buf.safe_push (obj: 'm'); |
| 24844 | buf.safe_push (obj: 'i'); |
| 24845 | break; |
| 24846 | case 'r': |
| 24847 | case 'm': |
| 24848 | case '<': |
| 24849 | case '>': |
| 24850 | case 'o': |
| 24851 | case 'V': |
| 24852 | case 'p': |
| 24853 | buf.safe_push (obj: 'j'); |
| 24854 | buf.safe_push (obj: cur[j]); |
| 24855 | break; |
| 24856 | case 'B': |
| 24857 | if (cur[j + 1] == 'm') |
| 24858 | { |
| 24859 | buf.safe_push (obj: 'j'); |
| 24860 | buf.safe_push (obj: 'a'); |
| 24861 | j++; |
| 24862 | } |
| 24863 | else |
| 24864 | { |
| 24865 | buf.safe_push (obj: cur[j]); |
| 24866 | buf.safe_push (obj: cur[j + 1]); |
| 24867 | j++; |
| 24868 | } |
| 24869 | break; |
| 24870 | case 'T': |
| 24871 | case 'Y': |
| 24872 | case 'W': |
| 24873 | case 'j': |
| 24874 | buf.safe_push (obj: cur[j]); |
| 24875 | buf.safe_push (obj: cur[j + 1]); |
| 24876 | j++; |
| 24877 | break; |
| 24878 | case '{': |
| 24879 | do |
| 24880 | { |
| 24881 | buf.safe_push (obj: cur[j]); |
| 24882 | } while (cur[j++] != '}'); |
| 24883 | break; |
| 24884 | default: |
| 24885 | buf.safe_push (obj: cur[j]); |
| 24886 | break; |
| 24887 | } |
| 24888 | } |
| 24889 | buf.safe_push (obj: '\0'); |
| 24890 | constraints[i] = xstrdup (buf.address ()); |
| 24891 | } |
| 24892 | } |
| 24893 | |
| 24894 | /* Worker function for TARGET_MD_ASM_ADJUST. |
| 24895 | |
| 24896 | We implement asm flag outputs, and maintain source compatibility |
| 24897 | with the old cc0-based compiler. */ |
| 24898 | |
| 24899 | static rtx_insn * |
| 24900 | ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, |
| 24901 | vec<machine_mode> & /*input_modes*/, |
| 24902 | vec<const char *> &constraints, vec<rtx> &/*uses*/, |
| 24903 | vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs, |
| 24904 | location_t loc) |
| 24905 | { |
| 24906 | bool saw_asm_flag = false; |
| 24907 | |
| 24908 | start_sequence (); |
| 24909 | |
| 24910 | if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32) |
| 24911 | map_egpr_constraints (constraints); |
| 24912 | |
| 24913 | for (unsigned i = 0, n = outputs.length (); i < n; ++i) |
| 24914 | { |
| 24915 | const char *con = constraints[i]; |
| 24916 | if (!startswith (str: con, prefix: "=@cc" )) |
| 24917 | continue; |
| 24918 | con += 4; |
| 24919 | if (strchr (s: con, c: ',') != NULL) |
| 24920 | { |
| 24921 | error_at (loc, "alternatives not allowed in %<asm%> flag output" ); |
| 24922 | continue; |
| 24923 | } |
| 24924 | |
| 24925 | bool invert = false; |
| 24926 | if (con[0] == 'n') |
| 24927 | invert = true, con++; |
| 24928 | |
| 24929 | machine_mode mode = CCmode; |
| 24930 | rtx_code code = UNKNOWN; |
| 24931 | |
| 24932 | switch (con[0]) |
| 24933 | { |
| 24934 | case 'a': |
| 24935 | if (con[1] == 0) |
| 24936 | mode = CCAmode, code = EQ; |
| 24937 | else if (con[1] == 'e' && con[2] == 0) |
| 24938 | mode = CCCmode, code = NE; |
| 24939 | break; |
| 24940 | case 'b': |
| 24941 | if (con[1] == 0) |
| 24942 | mode = CCCmode, code = EQ; |
| 24943 | else if (con[1] == 'e' && con[2] == 0) |
| 24944 | mode = CCAmode, code = NE; |
| 24945 | break; |
| 24946 | case 'c': |
| 24947 | if (con[1] == 0) |
| 24948 | mode = CCCmode, code = EQ; |
| 24949 | break; |
| 24950 | case 'e': |
| 24951 | if (con[1] == 0) |
| 24952 | mode = CCZmode, code = EQ; |
| 24953 | break; |
| 24954 | case 'g': |
| 24955 | if (con[1] == 0) |
| 24956 | mode = CCGCmode, code = GT; |
| 24957 | else if (con[1] == 'e' && con[2] == 0) |
| 24958 | mode = CCGCmode, code = GE; |
| 24959 | break; |
| 24960 | case 'l': |
| 24961 | if (con[1] == 0) |
| 24962 | mode = CCGCmode, code = LT; |
| 24963 | else if (con[1] == 'e' && con[2] == 0) |
| 24964 | mode = CCGCmode, code = LE; |
| 24965 | break; |
| 24966 | case 'o': |
| 24967 | if (con[1] == 0) |
| 24968 | mode = CCOmode, code = EQ; |
| 24969 | break; |
| 24970 | case 'p': |
| 24971 | if (con[1] == 0) |
| 24972 | mode = CCPmode, code = EQ; |
| 24973 | break; |
| 24974 | case 's': |
| 24975 | if (con[1] == 0) |
| 24976 | mode = CCSmode, code = EQ; |
| 24977 | break; |
| 24978 | case 'z': |
| 24979 | if (con[1] == 0) |
| 24980 | mode = CCZmode, code = EQ; |
| 24981 | break; |
| 24982 | } |
| 24983 | if (code == UNKNOWN) |
| 24984 | { |
| 24985 | error_at (loc, "unknown %<asm%> flag output %qs" , constraints[i]); |
| 24986 | continue; |
| 24987 | } |
| 24988 | if (invert) |
| 24989 | code = reverse_condition (code); |
| 24990 | |
| 24991 | rtx dest = outputs[i]; |
| 24992 | if (!saw_asm_flag) |
| 24993 | { |
| 24994 | /* This is the first asm flag output. Here we put the flags |
| 24995 | register in as the real output and adjust the condition to |
| 24996 | allow it. */ |
| 24997 | constraints[i] = "=Bf" ; |
| 24998 | outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG); |
| 24999 | saw_asm_flag = true; |
| 25000 | } |
| 25001 | else |
| 25002 | { |
| 25003 | /* We don't need the flags register as output twice. */ |
| 25004 | constraints[i] = "=X" ; |
| 25005 | outputs[i] = gen_rtx_SCRATCH (SImode); |
| 25006 | } |
| 25007 | |
| 25008 | rtx x = gen_rtx_REG (mode, FLAGS_REG); |
| 25009 | x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx); |
| 25010 | |
| 25011 | machine_mode dest_mode = GET_MODE (dest); |
| 25012 | if (!SCALAR_INT_MODE_P (dest_mode)) |
| 25013 | { |
| 25014 | error_at (loc, "invalid type for %<asm%> flag output" ); |
| 25015 | continue; |
| 25016 | } |
| 25017 | |
| 25018 | if (dest_mode == QImode) |
| 25019 | emit_insn (gen_rtx_SET (dest, x)); |
| 25020 | else |
| 25021 | { |
| 25022 | rtx reg = gen_reg_rtx (QImode); |
| 25023 | emit_insn (gen_rtx_SET (reg, x)); |
| 25024 | |
| 25025 | reg = convert_to_mode (dest_mode, reg, 1); |
| 25026 | emit_move_insn (dest, reg); |
| 25027 | } |
| 25028 | } |
| 25029 | |
| 25030 | rtx_insn *seq = end_sequence (); |
| 25031 | |
| 25032 | if (saw_asm_flag) |
| 25033 | return seq; |
| 25034 | else |
| 25035 | { |
| 25036 | /* If we had no asm flag outputs, clobber the flags. */ |
| 25037 | clobbers.safe_push (obj: gen_rtx_REG (CCmode, FLAGS_REG)); |
| 25038 | SET_HARD_REG_BIT (set&: clobbered_regs, FLAGS_REG); |
| 25039 | return NULL; |
| 25040 | } |
| 25041 | } |
| 25042 | |
| 25043 | /* Implements target vector targetm.asm.encode_section_info. */ |
| 25044 | |
| 25045 | static void ATTRIBUTE_UNUSED |
| 25046 | ix86_encode_section_info (tree decl, rtx rtl, int first) |
| 25047 | { |
| 25048 | default_encode_section_info (decl, rtl, first); |
| 25049 | |
| 25050 | if (ix86_in_large_data_p (exp: decl)) |
| 25051 | SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; |
| 25052 | } |
| 25053 | |
| 25054 | /* Worker function for REVERSE_CONDITION. */ |
| 25055 | |
| 25056 | enum rtx_code |
| 25057 | ix86_reverse_condition (enum rtx_code code, machine_mode mode) |
| 25058 | { |
| 25059 | return (mode == CCFPmode |
| 25060 | ? reverse_condition_maybe_unordered (code) |
| 25061 | : reverse_condition (code)); |
| 25062 | } |
| 25063 | |
| 25064 | /* Output code to perform an x87 FP register move, from OPERANDS[1] |
| 25065 | to OPERANDS[0]. */ |
| 25066 | |
| 25067 | const char * |
| 25068 | output_387_reg_move (rtx_insn *insn, rtx *operands) |
| 25069 | { |
| 25070 | if (REG_P (operands[0])) |
| 25071 | { |
| 25072 | if (REG_P (operands[1]) |
| 25073 | && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
| 25074 | { |
| 25075 | if (REGNO (operands[0]) == FIRST_STACK_REG) |
| 25076 | return output_387_ffreep (operands, opno: 0); |
| 25077 | return "fstp\t%y0" ; |
| 25078 | } |
| 25079 | if (STACK_TOP_P (operands[0])) |
| 25080 | return "fld%Z1\t%y1" ; |
| 25081 | return "fst\t%y0" ; |
| 25082 | } |
| 25083 | else if (MEM_P (operands[0])) |
| 25084 | { |
| 25085 | gcc_assert (REG_P (operands[1])); |
| 25086 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
| 25087 | return "fstp%Z0\t%y0" ; |
| 25088 | else |
| 25089 | { |
| 25090 | /* There is no non-popping store to memory for XFmode. |
| 25091 | So if we need one, follow the store with a load. */ |
| 25092 | if (GET_MODE (operands[0]) == XFmode) |
| 25093 | return "fstp%Z0\t%y0\n\tfld%Z0\t%y0" ; |
| 25094 | else |
| 25095 | return "fst%Z0\t%y0" ; |
| 25096 | } |
| 25097 | } |
| 25098 | else |
| 25099 | gcc_unreachable(); |
| 25100 | } |
| 25101 | #ifdef TARGET_SOLARIS |
| 25102 | /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ |
| 25103 | |
| 25104 | static void |
| 25105 | i386_solaris_elf_named_section (const char *name, unsigned int flags, |
| 25106 | tree decl) |
| 25107 | { |
| 25108 | /* With Binutils 2.15, the "@unwind" marker must be specified on |
| 25109 | every occurrence of the ".eh_frame" section, not just the first |
| 25110 | one. */ |
| 25111 | if (TARGET_64BIT |
| 25112 | && strcmp (name, ".eh_frame" ) == 0) |
| 25113 | { |
| 25114 | fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n" , name, |
| 25115 | flags & SECTION_WRITE ? "aw" : "a" ); |
| 25116 | return; |
| 25117 | } |
| 25118 | |
| 25119 | #if !HAVE_GNU_AS |
| 25120 | if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) |
| 25121 | { |
| 25122 | solaris_elf_asm_comdat_section (name, flags, decl); |
| 25123 | return; |
| 25124 | } |
| 25125 | |
| 25126 | /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the |
| 25127 | SPARC assembler. One cannot mix single-letter flags and #exclude, so |
| 25128 | only emit the latter here. */ |
| 25129 | if (flags & SECTION_EXCLUDE) |
| 25130 | { |
| 25131 | fprintf (asm_out_file, "\t.section\t%s,#exclude\n" , name); |
| 25132 | return; |
| 25133 | } |
| 25134 | #endif |
| 25135 | |
| 25136 | default_elf_asm_named_section (name, flags, decl); |
| 25137 | } |
| 25138 | #endif /* TARGET_SOLARIS */ |
| 25139 | |
| 25140 | /* Return the mangling of TYPE if it is an extended fundamental type. */ |
| 25141 | |
| 25142 | static const char * |
| 25143 | ix86_mangle_type (const_tree type) |
| 25144 | { |
| 25145 | type = TYPE_MAIN_VARIANT (type); |
| 25146 | |
| 25147 | if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE |
| 25148 | && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) |
| 25149 | return NULL; |
| 25150 | |
| 25151 | if (type == float128_type_node || type == float64x_type_node) |
| 25152 | return NULL; |
| 25153 | |
| 25154 | switch (TYPE_MODE (type)) |
| 25155 | { |
| 25156 | case E_BFmode: |
| 25157 | return "DF16b" ; |
| 25158 | case E_HFmode: |
| 25159 | /* _Float16 is "DF16_". |
| 25160 | Align with clang's decision in https://reviews.llvm.org/D33719. */ |
| 25161 | return "DF16_" ; |
| 25162 | case E_TFmode: |
| 25163 | /* __float128 is "g". */ |
| 25164 | return "g" ; |
| 25165 | case E_XFmode: |
| 25166 | /* "long double" or __float80 is "e". */ |
| 25167 | return "e" ; |
| 25168 | default: |
| 25169 | return NULL; |
| 25170 | } |
| 25171 | } |
| 25172 | |
| 25173 | /* Create C++ tinfo symbols for only conditionally available fundamental |
| 25174 | types. */ |
| 25175 | |
| 25176 | static void |
| 25177 | ix86_emit_support_tinfos (emit_support_tinfos_callback callback) |
| 25178 | { |
| 25179 | extern tree ix86_float16_type_node; |
| 25180 | extern tree ix86_bf16_type_node; |
| 25181 | |
| 25182 | if (!TARGET_SSE2) |
| 25183 | { |
| 25184 | if (!float16_type_node) |
| 25185 | float16_type_node = ix86_float16_type_node; |
| 25186 | if (!bfloat16_type_node) |
| 25187 | bfloat16_type_node = ix86_bf16_type_node; |
| 25188 | callback (float16_type_node); |
| 25189 | callback (bfloat16_type_node); |
| 25190 | float16_type_node = NULL_TREE; |
| 25191 | bfloat16_type_node = NULL_TREE; |
| 25192 | } |
| 25193 | } |
| 25194 | |
| 25195 | static GTY(()) tree ix86_tls_stack_chk_guard_decl; |
| 25196 | |
| 25197 | static tree |
| 25198 | ix86_stack_protect_guard (void) |
| 25199 | { |
| 25200 | if (TARGET_SSP_TLS_GUARD) |
| 25201 | { |
| 25202 | tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1); |
| 25203 | int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg); |
| 25204 | tree type = build_qualified_type (type_node, qual); |
| 25205 | tree t; |
| 25206 | |
| 25207 | if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str)) |
| 25208 | { |
| 25209 | t = ix86_tls_stack_chk_guard_decl; |
| 25210 | |
| 25211 | if (t == NULL) |
| 25212 | { |
| 25213 | rtx x; |
| 25214 | |
| 25215 | t = build_decl |
| 25216 | (UNKNOWN_LOCATION, VAR_DECL, |
| 25217 | get_identifier (ix86_stack_protector_guard_symbol_str), |
| 25218 | type); |
| 25219 | TREE_STATIC (t) = 1; |
| 25220 | TREE_PUBLIC (t) = 1; |
| 25221 | DECL_EXTERNAL (t) = 1; |
| 25222 | TREE_USED (t) = 1; |
| 25223 | TREE_THIS_VOLATILE (t) = 1; |
| 25224 | DECL_ARTIFICIAL (t) = 1; |
| 25225 | DECL_IGNORED_P (t) = 1; |
| 25226 | |
| 25227 | /* Do not share RTL as the declaration is visible outside of |
| 25228 | current function. */ |
| 25229 | x = DECL_RTL (t); |
| 25230 | RTX_FLAG (x, used) = 1; |
| 25231 | |
| 25232 | ix86_tls_stack_chk_guard_decl = t; |
| 25233 | } |
| 25234 | } |
| 25235 | else |
| 25236 | { |
| 25237 | tree asptrtype = build_pointer_type (type); |
| 25238 | |
| 25239 | t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset); |
| 25240 | t = build2 (MEM_REF, asptrtype, t, |
| 25241 | build_int_cst (asptrtype, 0)); |
| 25242 | TREE_THIS_VOLATILE (t) = 1; |
| 25243 | } |
| 25244 | |
| 25245 | return t; |
| 25246 | } |
| 25247 | |
| 25248 | return default_stack_protect_guard (); |
| 25249 | } |
| 25250 | |
| 25251 | static bool |
| 25252 | ix86_stack_protect_runtime_enabled_p (void) |
| 25253 | { |
| 25254 | /* Naked functions should not enable stack protector. */ |
| 25255 | return !ix86_function_naked (fn: current_function_decl); |
| 25256 | } |
| 25257 | |
| 25258 | /* For 32-bit code we can save PIC register setup by using |
| 25259 | __stack_chk_fail_local hidden function instead of calling |
| 25260 | __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC |
| 25261 | register, so it is better to call __stack_chk_fail directly. */ |
| 25262 | |
| 25263 | static tree ATTRIBUTE_UNUSED |
| 25264 | ix86_stack_protect_fail (void) |
| 25265 | { |
| 25266 | return TARGET_64BIT |
| 25267 | ? default_external_stack_protect_fail () |
| 25268 | : default_hidden_stack_protect_fail (); |
| 25269 | } |
| 25270 | |
| 25271 | /* Select a format to encode pointers in exception handling data. CODE |
| 25272 | is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is |
| 25273 | true if the symbol may be affected by dynamic relocations. |
| 25274 | |
| 25275 | ??? All x86 object file formats are capable of representing this. |
| 25276 | After all, the relocation needed is the same as for the call insn. |
| 25277 | Whether or not a particular assembler allows us to enter such, I |
| 25278 | guess we'll have to see. */ |
| 25279 | |
| 25280 | int |
| 25281 | asm_preferred_eh_data_format (int code, int global) |
| 25282 | { |
| 25283 | /* PE-COFF is effectively always -fPIC because of the .reloc section. */ |
| 25284 | if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access) |
| 25285 | { |
| 25286 | int type = DW_EH_PE_sdata8; |
| 25287 | if (ptr_mode == SImode |
| 25288 | || ix86_cmodel == CM_SMALL_PIC |
| 25289 | || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) |
| 25290 | type = DW_EH_PE_sdata4; |
| 25291 | return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; |
| 25292 | } |
| 25293 | |
| 25294 | if (ix86_cmodel == CM_SMALL |
| 25295 | || (ix86_cmodel == CM_MEDIUM && code)) |
| 25296 | return DW_EH_PE_udata4; |
| 25297 | |
| 25298 | return DW_EH_PE_absptr; |
| 25299 | } |
| 25300 | |
| 25301 | /* Worker for ix86_builtin_vectorization_cost and the fallback calls |
| 25302 | from ix86_vector_costs::add_stmt_cost. */ |
| 25303 | static int |
| 25304 | ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost, |
| 25305 | machine_mode mode) |
| 25306 | { |
| 25307 | bool fp = FLOAT_MODE_P (mode); |
| 25308 | int index; |
| 25309 | switch (type_of_cost) |
| 25310 | { |
| 25311 | case scalar_stmt: |
| 25312 | return fp ? ix86_cost->addss : COSTS_N_INSNS (1); |
| 25313 | |
| 25314 | case scalar_load: |
| 25315 | /* load/store costs are relative to register move which is 2. Recompute |
| 25316 | it to COSTS_N_INSNS so everything have same base. */ |
| 25317 | return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0] |
| 25318 | : ix86_cost->int_load [2]) / 2; |
| 25319 | |
| 25320 | case scalar_store: |
| 25321 | return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0] |
| 25322 | : ix86_cost->int_store [2]) / 2; |
| 25323 | |
| 25324 | case vector_stmt: |
| 25325 | return ix86_vec_cost (mode, |
| 25326 | cost: fp ? ix86_cost->addss : ix86_cost->sse_op); |
| 25327 | |
| 25328 | case vector_load: |
| 25329 | index = sse_store_index (mode); |
| 25330 | /* See PR82713 - we may end up being called on non-vector type. */ |
| 25331 | if (index < 0) |
| 25332 | index = 2; |
| 25333 | return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2; |
| 25334 | |
| 25335 | case vector_store: |
| 25336 | index = sse_store_index (mode); |
| 25337 | /* See PR82713 - we may end up being called on non-vector type. */ |
| 25338 | if (index < 0) |
| 25339 | index = 2; |
| 25340 | return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2; |
| 25341 | |
| 25342 | case vec_to_scalar: |
| 25343 | case scalar_to_vec: |
| 25344 | return ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
| 25345 | |
| 25346 | /* We should have separate costs for unaligned loads and gather/scatter. |
| 25347 | Do that incrementally. */ |
| 25348 | case unaligned_load: |
| 25349 | index = sse_store_index (mode); |
| 25350 | /* See PR82713 - we may end up being called on non-vector type. */ |
| 25351 | if (index < 0) |
| 25352 | index = 2; |
| 25353 | return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2; |
| 25354 | |
| 25355 | case unaligned_store: |
| 25356 | index = sse_store_index (mode); |
| 25357 | /* See PR82713 - we may end up being called on non-vector type. */ |
| 25358 | if (index < 0) |
| 25359 | index = 2; |
| 25360 | return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2; |
| 25361 | |
| 25362 | case vector_gather_load: |
| 25363 | return ix86_vec_cost (mode, |
| 25364 | COSTS_N_INSNS |
| 25365 | (ix86_cost->gather_static |
| 25366 | + ix86_cost->gather_per_elt |
| 25367 | * GET_MODE_NUNITS (mode)) / 2); |
| 25368 | |
| 25369 | case vector_scatter_store: |
| 25370 | return ix86_vec_cost (mode, |
| 25371 | COSTS_N_INSNS |
| 25372 | (ix86_cost->scatter_static |
| 25373 | + ix86_cost->scatter_per_elt |
| 25374 | * GET_MODE_NUNITS (mode)) / 2); |
| 25375 | |
| 25376 | case cond_branch_taken: |
| 25377 | return ix86_cost->cond_taken_branch_cost; |
| 25378 | |
| 25379 | case cond_branch_not_taken: |
| 25380 | return ix86_cost->cond_not_taken_branch_cost; |
| 25381 | |
| 25382 | case vec_perm: |
| 25383 | return ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
| 25384 | |
| 25385 | case vec_promote_demote: |
| 25386 | if (fp) |
| 25387 | return vec_fp_conversion_cost (cost: ix86_tune_cost, size: mode); |
| 25388 | return ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
| 25389 | |
| 25390 | case vec_construct: |
| 25391 | { |
| 25392 | int n = GET_MODE_NUNITS (mode); |
| 25393 | /* N - 1 element inserts into an SSE vector, the possible |
| 25394 | GPR -> XMM move is accounted for in add_stmt_cost. */ |
| 25395 | if (GET_MODE_BITSIZE (mode) <= 128) |
| 25396 | return (n - 1) * ix86_cost->sse_op; |
| 25397 | /* One vinserti128 for combining two SSE vectors for AVX256. */ |
| 25398 | else if (GET_MODE_BITSIZE (mode) == 256) |
| 25399 | return ((n - 2) * ix86_cost->sse_op |
| 25400 | + ix86_vec_cost (mode, cost: ix86_cost->sse_op)); |
| 25401 | /* One vinserti64x4 and two vinserti128 for combining SSE |
| 25402 | and AVX256 vectors to AVX512. */ |
| 25403 | else if (GET_MODE_BITSIZE (mode) == 512) |
| 25404 | { |
| 25405 | machine_mode half_mode |
| 25406 | = mode_for_vector (GET_MODE_INNER (mode), |
| 25407 | GET_MODE_NUNITS (mode) / 2).require (); |
| 25408 | return ((n - 4) * ix86_cost->sse_op |
| 25409 | + 2 * ix86_vec_cost (mode: half_mode, cost: ix86_cost->sse_op) |
| 25410 | + ix86_vec_cost (mode, cost: ix86_cost->sse_op)); |
| 25411 | } |
| 25412 | gcc_unreachable (); |
| 25413 | } |
| 25414 | |
| 25415 | default: |
| 25416 | gcc_unreachable (); |
| 25417 | } |
| 25418 | } |
| 25419 | |
| 25420 | /* Implement targetm.vectorize.builtin_vectorization_cost. */ |
| 25421 | static int |
| 25422 | ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, |
| 25423 | tree vectype, int) |
| 25424 | { |
| 25425 | machine_mode mode = TImode; |
| 25426 | if (vectype != NULL) |
| 25427 | mode = TYPE_MODE (vectype); |
| 25428 | return ix86_default_vector_cost (type_of_cost, mode); |
| 25429 | } |
| 25430 | |
| 25431 | |
| 25432 | /* This function returns the calling abi specific va_list type node. |
| 25433 | It returns the FNDECL specific va_list type. */ |
| 25434 | |
| 25435 | static tree |
| 25436 | ix86_fn_abi_va_list (tree fndecl) |
| 25437 | { |
| 25438 | if (!TARGET_64BIT) |
| 25439 | return va_list_type_node; |
| 25440 | gcc_assert (fndecl != NULL_TREE); |
| 25441 | |
| 25442 | if (ix86_function_abi (fndecl: (const_tree) fndecl) == MS_ABI) |
| 25443 | return ms_va_list_type_node; |
| 25444 | else |
| 25445 | return sysv_va_list_type_node; |
| 25446 | } |
| 25447 | |
| 25448 | /* Returns the canonical va_list type specified by TYPE. If there |
| 25449 | is no valid TYPE provided, it return NULL_TREE. */ |
| 25450 | |
| 25451 | static tree |
| 25452 | ix86_canonical_va_list_type (tree type) |
| 25453 | { |
| 25454 | if (TARGET_64BIT) |
| 25455 | { |
| 25456 | if (lookup_attribute (attr_name: "ms_abi va_list" , TYPE_ATTRIBUTES (type))) |
| 25457 | return ms_va_list_type_node; |
| 25458 | |
| 25459 | if ((TREE_CODE (type) == ARRAY_TYPE |
| 25460 | && integer_zerop (array_type_nelts_minus_one (type))) |
| 25461 | || POINTER_TYPE_P (type)) |
| 25462 | { |
| 25463 | tree elem_type = TREE_TYPE (type); |
| 25464 | if (TREE_CODE (elem_type) == RECORD_TYPE |
| 25465 | && lookup_attribute (attr_name: "sysv_abi va_list" , |
| 25466 | TYPE_ATTRIBUTES (elem_type))) |
| 25467 | return sysv_va_list_type_node; |
| 25468 | } |
| 25469 | |
| 25470 | return NULL_TREE; |
| 25471 | } |
| 25472 | |
| 25473 | return std_canonical_va_list_type (type); |
| 25474 | } |
| 25475 | |
| 25476 | /* Iterate through the target-specific builtin types for va_list. |
| 25477 | IDX denotes the iterator, *PTREE is set to the result type of |
| 25478 | the va_list builtin, and *PNAME to its internal type. |
| 25479 | Returns zero if there is no element for this index, otherwise |
| 25480 | IDX should be increased upon the next call. |
| 25481 | Note, do not iterate a base builtin's name like __builtin_va_list. |
| 25482 | Used from c_common_nodes_and_builtins. */ |
| 25483 | |
| 25484 | static int |
| 25485 | ix86_enum_va_list (int idx, const char **pname, tree *ptree) |
| 25486 | { |
| 25487 | if (TARGET_64BIT) |
| 25488 | { |
| 25489 | switch (idx) |
| 25490 | { |
| 25491 | default: |
| 25492 | break; |
| 25493 | |
| 25494 | case 0: |
| 25495 | *ptree = ms_va_list_type_node; |
| 25496 | *pname = "__builtin_ms_va_list" ; |
| 25497 | return 1; |
| 25498 | |
| 25499 | case 1: |
| 25500 | *ptree = sysv_va_list_type_node; |
| 25501 | *pname = "__builtin_sysv_va_list" ; |
| 25502 | return 1; |
| 25503 | } |
| 25504 | } |
| 25505 | |
| 25506 | return 0; |
| 25507 | } |
| 25508 | |
| 25509 | #undef TARGET_SCHED_DISPATCH |
| 25510 | #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch |
| 25511 | #undef TARGET_SCHED_DISPATCH_DO |
| 25512 | #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch |
| 25513 | #undef TARGET_SCHED_REASSOCIATION_WIDTH |
| 25514 | #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width |
| 25515 | #undef TARGET_SCHED_REORDER |
| 25516 | #define TARGET_SCHED_REORDER ix86_atom_sched_reorder |
| 25517 | #undef TARGET_SCHED_ADJUST_PRIORITY |
| 25518 | #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority |
| 25519 | #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK |
| 25520 | #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \ |
| 25521 | ix86_dependencies_evaluation_hook |
| 25522 | |
| 25523 | |
| 25524 | /* Implementation of reassociation_width target hook used by |
| 25525 | reassoc phase to identify parallelism level in reassociated |
| 25526 | tree. Statements tree_code is passed in OPC. Arguments type |
| 25527 | is passed in MODE. */ |
| 25528 | |
| 25529 | static int |
| 25530 | ix86_reassociation_width (unsigned int op, machine_mode mode) |
| 25531 | { |
| 25532 | int width = 1; |
| 25533 | /* Vector part. */ |
| 25534 | if (VECTOR_MODE_P (mode)) |
| 25535 | { |
| 25536 | int div = 1; |
| 25537 | if (INTEGRAL_MODE_P (mode)) |
| 25538 | width = ix86_cost->reassoc_vec_int; |
| 25539 | else if (FLOAT_MODE_P (mode)) |
| 25540 | width = ix86_cost->reassoc_vec_fp; |
| 25541 | |
| 25542 | if (width == 1) |
| 25543 | return 1; |
| 25544 | |
| 25545 | /* Znver1-4 Integer vector instructions execute in FP unit |
| 25546 | and can execute 3 additions and one multiplication per cycle. */ |
| 25547 | if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2 |
| 25548 | || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4) |
| 25549 | && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) |
| 25550 | return 1; |
| 25551 | /* Znver5 can do 2 integer multiplications per cycle with latency |
| 25552 | of 3. */ |
| 25553 | if ((ix86_tune == PROCESSOR_ZNVER5 || ix86_tune == PROCESSOR_ZNVER6) |
| 25554 | && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) |
| 25555 | width = 6; |
| 25556 | |
| 25557 | /* Account for targets that splits wide vectors into multiple parts. */ |
| 25558 | if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256) |
| 25559 | div = GET_MODE_BITSIZE (mode) / 256; |
| 25560 | else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128) |
| 25561 | div = GET_MODE_BITSIZE (mode) / 128; |
| 25562 | else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64) |
| 25563 | div = GET_MODE_BITSIZE (mode) / 64; |
| 25564 | width = (width + div - 1) / div; |
| 25565 | } |
| 25566 | /* Scalar part. */ |
| 25567 | else if (INTEGRAL_MODE_P (mode)) |
| 25568 | width = ix86_cost->reassoc_int; |
| 25569 | else if (FLOAT_MODE_P (mode)) |
| 25570 | width = ix86_cost->reassoc_fp; |
| 25571 | |
| 25572 | /* Avoid using too many registers in 32bit mode. */ |
| 25573 | if (!TARGET_64BIT && width > 2) |
| 25574 | width = 2; |
| 25575 | return width; |
| 25576 | } |
| 25577 | |
| 25578 | /* ??? No autovectorization into MMX or 3DNOW until we can reliably |
| 25579 | place emms and femms instructions. */ |
| 25580 | |
| 25581 | static machine_mode |
| 25582 | ix86_preferred_simd_mode (scalar_mode mode) |
| 25583 | { |
| 25584 | if (!TARGET_SSE) |
| 25585 | return word_mode; |
| 25586 | |
| 25587 | switch (mode) |
| 25588 | { |
| 25589 | case E_QImode: |
| 25590 | if (TARGET_AVX512BW && !TARGET_PREFER_AVX256) |
| 25591 | return V64QImode; |
| 25592 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
| 25593 | return V32QImode; |
| 25594 | else |
| 25595 | return V16QImode; |
| 25596 | |
| 25597 | case E_HImode: |
| 25598 | if (TARGET_AVX512BW && !TARGET_PREFER_AVX256) |
| 25599 | return V32HImode; |
| 25600 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
| 25601 | return V16HImode; |
| 25602 | else |
| 25603 | return V8HImode; |
| 25604 | |
| 25605 | case E_SImode: |
| 25606 | if (TARGET_AVX512F && !TARGET_PREFER_AVX256) |
| 25607 | return V16SImode; |
| 25608 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
| 25609 | return V8SImode; |
| 25610 | else |
| 25611 | return V4SImode; |
| 25612 | |
| 25613 | case E_DImode: |
| 25614 | if (TARGET_AVX512F && !TARGET_PREFER_AVX256) |
| 25615 | return V8DImode; |
| 25616 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
| 25617 | return V4DImode; |
| 25618 | else |
| 25619 | return V2DImode; |
| 25620 | |
| 25621 | case E_HFmode: |
| 25622 | if (TARGET_AVX512FP16) |
| 25623 | { |
| 25624 | if (TARGET_AVX512VL) |
| 25625 | { |
| 25626 | if (TARGET_PREFER_AVX128) |
| 25627 | return V8HFmode; |
| 25628 | else if (TARGET_PREFER_AVX256) |
| 25629 | return V16HFmode; |
| 25630 | } |
| 25631 | return V32HFmode; |
| 25632 | } |
| 25633 | return word_mode; |
| 25634 | |
| 25635 | case E_BFmode: |
| 25636 | if (TARGET_AVX512F && !TARGET_PREFER_AVX256) |
| 25637 | return V32BFmode; |
| 25638 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
| 25639 | return V16BFmode; |
| 25640 | else |
| 25641 | return V8BFmode; |
| 25642 | |
| 25643 | case E_SFmode: |
| 25644 | if (TARGET_AVX512F && !TARGET_PREFER_AVX256) |
| 25645 | return V16SFmode; |
| 25646 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
| 25647 | return V8SFmode; |
| 25648 | else |
| 25649 | return V4SFmode; |
| 25650 | |
| 25651 | case E_DFmode: |
| 25652 | if (TARGET_AVX512F && !TARGET_PREFER_AVX256) |
| 25653 | return V8DFmode; |
| 25654 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
| 25655 | return V4DFmode; |
| 25656 | else if (TARGET_SSE2) |
| 25657 | return V2DFmode; |
| 25658 | /* FALLTHRU */ |
| 25659 | |
| 25660 | default: |
| 25661 | return word_mode; |
| 25662 | } |
| 25663 | } |
| 25664 | |
| 25665 | /* If AVX is enabled then try vectorizing with both 256bit and 128bit |
| 25666 | vectors. If AVX512F is enabled then try vectorizing with 512bit, |
| 25667 | 256bit and 128bit vectors. */ |
| 25668 | |
| 25669 | static unsigned int |
| 25670 | ix86_autovectorize_vector_modes (vector_modes *modes, bool all) |
| 25671 | { |
| 25672 | if (TARGET_AVX512F && !TARGET_PREFER_AVX256) |
| 25673 | { |
| 25674 | modes->safe_push (V64QImode); |
| 25675 | modes->safe_push (V32QImode); |
| 25676 | modes->safe_push (V16QImode); |
| 25677 | } |
| 25678 | else if (TARGET_AVX512F && all) |
| 25679 | { |
| 25680 | modes->safe_push (V32QImode); |
| 25681 | modes->safe_push (V16QImode); |
| 25682 | modes->safe_push (V64QImode); |
| 25683 | } |
| 25684 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
| 25685 | { |
| 25686 | modes->safe_push (V32QImode); |
| 25687 | modes->safe_push (V16QImode); |
| 25688 | } |
| 25689 | else if (TARGET_AVX && all) |
| 25690 | { |
| 25691 | modes->safe_push (V16QImode); |
| 25692 | modes->safe_push (V32QImode); |
| 25693 | } |
| 25694 | else if (TARGET_SSE2) |
| 25695 | modes->safe_push (V16QImode); |
| 25696 | |
| 25697 | if (TARGET_MMX_WITH_SSE) |
| 25698 | modes->safe_push (V8QImode); |
| 25699 | |
| 25700 | if (TARGET_SSE2) |
| 25701 | modes->safe_push (V4QImode); |
| 25702 | |
| 25703 | return ix86_vect_compare_costs ? VECT_COMPARE_COSTS : 0; |
| 25704 | } |
| 25705 | |
| 25706 | /* Implemenation of targetm.vectorize.get_mask_mode. */ |
| 25707 | |
| 25708 | static opt_machine_mode |
| 25709 | ix86_get_mask_mode (machine_mode data_mode) |
| 25710 | { |
| 25711 | unsigned vector_size = GET_MODE_SIZE (data_mode); |
| 25712 | unsigned nunits = GET_MODE_NUNITS (data_mode); |
| 25713 | unsigned elem_size = vector_size / nunits; |
| 25714 | |
| 25715 | /* Scalar mask case. */ |
| 25716 | if ((TARGET_AVX512F && vector_size == 64) |
| 25717 | || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)) |
| 25718 | /* AVX512FP16 only supports vector comparison |
| 25719 | to kmask for _Float16. */ |
| 25720 | || (TARGET_AVX512VL && TARGET_AVX512FP16 |
| 25721 | && GET_MODE_INNER (data_mode) == E_HFmode) |
| 25722 | || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode)) |
| 25723 | { |
| 25724 | if (elem_size == 4 |
| 25725 | || elem_size == 8 |
| 25726 | || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2))) |
| 25727 | return smallest_int_mode_for_size (size: nunits).require (); |
| 25728 | } |
| 25729 | |
| 25730 | scalar_int_mode elem_mode |
| 25731 | = smallest_int_mode_for_size (size: elem_size * BITS_PER_UNIT).require (); |
| 25732 | |
| 25733 | gcc_assert (elem_size * nunits == vector_size); |
| 25734 | |
| 25735 | return mode_for_vector (elem_mode, nunits); |
| 25736 | } |
| 25737 | |
| 25738 | |
| 25739 | |
| 25740 | /* Return class of registers which could be used for pseudo of MODE |
| 25741 | and of class RCLASS for spilling instead of memory. Return NO_REGS |
| 25742 | if it is not possible or non-profitable. */ |
| 25743 | |
| 25744 | /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */ |
| 25745 | |
| 25746 | static reg_class_t |
| 25747 | ix86_spill_class (reg_class_t rclass, machine_mode mode) |
| 25748 | { |
| 25749 | if (0 && TARGET_GENERAL_REGS_SSE_SPILL |
| 25750 | && TARGET_SSE2 |
| 25751 | && TARGET_INTER_UNIT_MOVES_TO_VEC |
| 25752 | && TARGET_INTER_UNIT_MOVES_FROM_VEC |
| 25753 | && (mode == SImode || (TARGET_64BIT && mode == DImode)) |
| 25754 | && INTEGER_CLASS_P (rclass)) |
| 25755 | return ALL_SSE_REGS; |
| 25756 | return NO_REGS; |
| 25757 | } |
| 25758 | |
| 25759 | /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation, |
| 25760 | but returns a lower bound. */ |
| 25761 | |
| 25762 | static unsigned int |
| 25763 | ix86_max_noce_ifcvt_seq_cost (edge e) |
| 25764 | { |
| 25765 | bool predictable_p = predictable_edge_p (e); |
| 25766 | if (predictable_p) |
| 25767 | { |
| 25768 | if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost)) |
| 25769 | return param_max_rtl_if_conversion_predictable_cost; |
| 25770 | } |
| 25771 | else |
| 25772 | { |
| 25773 | if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost)) |
| 25774 | return param_max_rtl_if_conversion_unpredictable_cost; |
| 25775 | } |
| 25776 | |
| 25777 | /* For modern machines with deeper pipeline, the penalty for branch |
| 25778 | misprediction could be higher than before to reset the pipeline |
| 25779 | slots. Add parameter br_mispredict_scale as a factor to describe |
| 25780 | the impact of reseting the pipeline. */ |
| 25781 | |
| 25782 | return BRANCH_COST (true, predictable_p) |
| 25783 | * ix86_tune_cost->br_mispredict_scale; |
| 25784 | } |
| 25785 | |
| 25786 | /* Return true if SEQ is a good candidate as a replacement for the |
| 25787 | if-convertible sequence described in IF_INFO. */ |
| 25788 | |
| 25789 | static bool |
| 25790 | ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) |
| 25791 | { |
| 25792 | if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p) |
| 25793 | { |
| 25794 | int cmov_cnt = 0; |
| 25795 | /* Punt if SEQ contains more than one CMOV or FCMOV instruction. |
| 25796 | Maybe we should allow even more conditional moves as long as they |
| 25797 | are used far enough not to stall the CPU, or also consider |
| 25798 | IF_INFO->TEST_BB succ edge probabilities. */ |
| 25799 | for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn)) |
| 25800 | { |
| 25801 | rtx set = single_set (insn); |
| 25802 | if (!set) |
| 25803 | continue; |
| 25804 | if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE) |
| 25805 | continue; |
| 25806 | rtx src = SET_SRC (set); |
| 25807 | machine_mode mode = GET_MODE (src); |
| 25808 | if (GET_MODE_CLASS (mode) != MODE_INT |
| 25809 | && GET_MODE_CLASS (mode) != MODE_FLOAT) |
| 25810 | continue; |
| 25811 | if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1))) |
| 25812 | || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2)))) |
| 25813 | continue; |
| 25814 | /* insn is CMOV or FCMOV. */ |
| 25815 | if (++cmov_cnt > 1) |
| 25816 | return false; |
| 25817 | } |
| 25818 | } |
| 25819 | |
| 25820 | /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por) |
| 25821 | for movdfcc/movsfcc, and could possibly fail cost comparison. |
| 25822 | Increase branch cost will hurt performance for other modes, so |
| 25823 | specially add some preference for floating point ifcvt. */ |
| 25824 | if (!TARGET_SSE4_1 && if_info->x |
| 25825 | && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT |
| 25826 | && if_info->speed_p) |
| 25827 | { |
| 25828 | unsigned cost = seq_cost (seq, true); |
| 25829 | |
| 25830 | if (cost <= if_info->original_cost) |
| 25831 | return true; |
| 25832 | |
| 25833 | return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2)); |
| 25834 | } |
| 25835 | |
| 25836 | return default_noce_conversion_profitable_p (seq, if_info); |
| 25837 | } |
| 25838 | |
| 25839 | /* x86-specific vector costs. */ |
| 25840 | class ix86_vector_costs : public vector_costs |
| 25841 | { |
| 25842 | public: |
| 25843 | ix86_vector_costs (vec_info *, bool); |
| 25844 | |
| 25845 | unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind, |
| 25846 | stmt_vec_info stmt_info, slp_tree node, |
| 25847 | tree vectype, int misalign, |
| 25848 | vect_cost_model_location where) override; |
| 25849 | void finish_cost (const vector_costs *) override; |
| 25850 | |
| 25851 | private: |
| 25852 | |
| 25853 | /* Estimate register pressure of the vectorized code. */ |
| 25854 | void ix86_vect_estimate_reg_pressure (); |
| 25855 | /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for |
| 25856 | estimation of register pressure. |
| 25857 | ??? Currently it's only used by vec_construct/scalar_to_vec |
| 25858 | where we know it's not loaded from memory. */ |
| 25859 | unsigned m_num_gpr_needed[3]; |
| 25860 | unsigned m_num_sse_needed[3]; |
| 25861 | /* Number of 256-bit vector permutation. */ |
| 25862 | unsigned m_num_avx256_vec_perm[3]; |
| 25863 | /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR */ |
| 25864 | unsigned m_num_reduc[X86_REDUC_LAST]; |
| 25865 | /* Don't do unroll if m_prefer_unroll is false, default is true. */ |
| 25866 | bool m_prefer_unroll; |
| 25867 | }; |
| 25868 | |
| 25869 | ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar) |
| 25870 | : vector_costs (vinfo, costing_for_scalar), |
| 25871 | m_num_gpr_needed (), |
| 25872 | m_num_sse_needed (), |
| 25873 | m_num_avx256_vec_perm (), |
| 25874 | m_num_reduc (), |
| 25875 | m_prefer_unroll (true) |
| 25876 | {} |
| 25877 | |
| 25878 | /* Implement targetm.vectorize.create_costs. */ |
| 25879 | |
| 25880 | static vector_costs * |
| 25881 | ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) |
| 25882 | { |
| 25883 | return new ix86_vector_costs (vinfo, costing_for_scalar); |
| 25884 | } |
| 25885 | |
| 25886 | unsigned |
| 25887 | ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, |
| 25888 | stmt_vec_info stmt_info, slp_tree node, |
| 25889 | tree vectype, int, |
| 25890 | vect_cost_model_location where) |
| 25891 | { |
| 25892 | unsigned retval = 0; |
| 25893 | bool scalar_p |
| 25894 | = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store); |
| 25895 | int stmt_cost = - 1; |
| 25896 | |
| 25897 | bool fp = false; |
| 25898 | machine_mode mode = scalar_p ? SImode : TImode; |
| 25899 | |
| 25900 | if (vectype != NULL) |
| 25901 | { |
| 25902 | fp = FLOAT_TYPE_P (vectype); |
| 25903 | mode = TYPE_MODE (vectype); |
| 25904 | if (scalar_p) |
| 25905 | mode = TYPE_MODE (TREE_TYPE (vectype)); |
| 25906 | } |
| 25907 | /* When we are costing a scalar stmt use the scalar stmt to get at the |
| 25908 | type of the operation. */ |
| 25909 | else if (scalar_p && stmt_info) |
| 25910 | if (tree lhs = gimple_get_lhs (stmt_info->stmt)) |
| 25911 | { |
| 25912 | fp = FLOAT_TYPE_P (TREE_TYPE (lhs)); |
| 25913 | mode = TYPE_MODE (TREE_TYPE (lhs)); |
| 25914 | } |
| 25915 | |
| 25916 | if ((kind == vector_stmt || kind == scalar_stmt) |
| 25917 | && stmt_info |
| 25918 | && stmt_info->stmt && gimple_code (g: stmt_info->stmt) == GIMPLE_ASSIGN) |
| 25919 | { |
| 25920 | tree_code subcode = gimple_assign_rhs_code (gs: stmt_info->stmt); |
| 25921 | /*machine_mode inner_mode = mode; |
| 25922 | if (VECTOR_MODE_P (mode)) |
| 25923 | inner_mode = GET_MODE_INNER (mode);*/ |
| 25924 | |
| 25925 | switch (subcode) |
| 25926 | { |
| 25927 | case PLUS_EXPR: |
| 25928 | case POINTER_PLUS_EXPR: |
| 25929 | case MINUS_EXPR: |
| 25930 | if (kind == scalar_stmt) |
| 25931 | { |
| 25932 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 25933 | stmt_cost = ix86_cost->addss; |
| 25934 | else if (X87_FLOAT_MODE_P (mode)) |
| 25935 | stmt_cost = ix86_cost->fadd; |
| 25936 | else |
| 25937 | stmt_cost = ix86_cost->add; |
| 25938 | } |
| 25939 | else |
| 25940 | stmt_cost = ix86_vec_cost (mode, cost: fp ? ix86_cost->addss |
| 25941 | : ix86_cost->sse_op); |
| 25942 | break; |
| 25943 | |
| 25944 | case MULT_EXPR: |
| 25945 | /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw, |
| 25946 | take it as MULT_EXPR. */ |
| 25947 | case MULT_HIGHPART_EXPR: |
| 25948 | stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode); |
| 25949 | break; |
| 25950 | /* There's no direct instruction for WIDEN_MULT_EXPR, |
| 25951 | take emulation into account. */ |
| 25952 | case WIDEN_MULT_EXPR: |
| 25953 | stmt_cost = ix86_widen_mult_cost (cost: ix86_cost, mode, |
| 25954 | TYPE_UNSIGNED (vectype)); |
| 25955 | break; |
| 25956 | |
| 25957 | case NEGATE_EXPR: |
| 25958 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 25959 | stmt_cost = ix86_cost->sse_op; |
| 25960 | else if (X87_FLOAT_MODE_P (mode)) |
| 25961 | stmt_cost = ix86_cost->fchs; |
| 25962 | else if (VECTOR_MODE_P (mode)) |
| 25963 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
| 25964 | else |
| 25965 | stmt_cost = ix86_cost->add; |
| 25966 | break; |
| 25967 | case TRUNC_DIV_EXPR: |
| 25968 | case CEIL_DIV_EXPR: |
| 25969 | case FLOOR_DIV_EXPR: |
| 25970 | case ROUND_DIV_EXPR: |
| 25971 | case TRUNC_MOD_EXPR: |
| 25972 | case CEIL_MOD_EXPR: |
| 25973 | case FLOOR_MOD_EXPR: |
| 25974 | case RDIV_EXPR: |
| 25975 | case ROUND_MOD_EXPR: |
| 25976 | case EXACT_DIV_EXPR: |
| 25977 | stmt_cost = ix86_division_cost (cost: ix86_cost, mode); |
| 25978 | break; |
| 25979 | |
| 25980 | case RSHIFT_EXPR: |
| 25981 | case LSHIFT_EXPR: |
| 25982 | case LROTATE_EXPR: |
| 25983 | case RROTATE_EXPR: |
| 25984 | { |
| 25985 | tree op1 = gimple_assign_rhs1 (gs: stmt_info->stmt); |
| 25986 | tree op2 = gimple_assign_rhs2 (gs: stmt_info->stmt); |
| 25987 | stmt_cost = ix86_shift_rotate_cost |
| 25988 | (cost: ix86_cost, |
| 25989 | code: (subcode == RSHIFT_EXPR |
| 25990 | && !TYPE_UNSIGNED (TREE_TYPE (op1))) |
| 25991 | ? ASHIFTRT : LSHIFTRT, mode, |
| 25992 | TREE_CODE (op2) == INTEGER_CST, |
| 25993 | op1_val: cst_and_fits_in_hwi (op2) |
| 25994 | ? int_cst_value (op2) : -1, |
| 25995 | and_in_op1: false, shift_and_truncate: false, NULL, NULL); |
| 25996 | } |
| 25997 | break; |
| 25998 | case NOP_EXPR: |
| 25999 | /* Only sign-conversions are free. */ |
| 26000 | if (tree_nop_conversion_p |
| 26001 | (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), |
| 26002 | TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) |
| 26003 | stmt_cost = 0; |
| 26004 | else if (fp) |
| 26005 | stmt_cost = vec_fp_conversion_cost |
| 26006 | (cost: ix86_tune_cost, GET_MODE_BITSIZE (mode)); |
| 26007 | break; |
| 26008 | |
| 26009 | case FLOAT_EXPR: |
| 26010 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 26011 | stmt_cost = ix86_cost->cvtsi2ss; |
| 26012 | else if (X87_FLOAT_MODE_P (mode)) |
| 26013 | /* TODO: We do not have cost tables for x87. */ |
| 26014 | stmt_cost = ix86_cost->fadd; |
| 26015 | else |
| 26016 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtpi2ps); |
| 26017 | break; |
| 26018 | |
| 26019 | case FIX_TRUNC_EXPR: |
| 26020 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 26021 | stmt_cost = ix86_cost->cvtss2si; |
| 26022 | else if (X87_FLOAT_MODE_P (mode)) |
| 26023 | /* TODO: We do not have cost tables for x87. */ |
| 26024 | stmt_cost = ix86_cost->fadd; |
| 26025 | else |
| 26026 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtps2pi); |
| 26027 | break; |
| 26028 | |
| 26029 | case COND_EXPR: |
| 26030 | { |
| 26031 | /* SSE2 conditinal move sequence is: |
| 26032 | pcmpgtd %xmm5, %xmm0 (accounted separately) |
| 26033 | pand %xmm0, %xmm2 |
| 26034 | pandn %xmm1, %xmm0 |
| 26035 | por %xmm2, %xmm0 |
| 26036 | while SSE4 uses cmp + blend |
| 26037 | and AVX512 masked moves. |
| 26038 | |
| 26039 | The condition is accounted separately since we usually have |
| 26040 | p = a < b |
| 26041 | c = p ? x : y |
| 26042 | and we will account first statement as setcc. Exception is when |
| 26043 | p is loaded from memory as bool and then we will not acocunt |
| 26044 | the compare, but there is no way to check for this. */ |
| 26045 | |
| 26046 | int ninsns = TARGET_SSE4_1 ? 1 : 3; |
| 26047 | |
| 26048 | /* If one of parameters is 0 or -1 the sequence will be simplified: |
| 26049 | (if_true & mask) | (if_false & ~mask) -> if_true & mask */ |
| 26050 | if (ninsns > 1 |
| 26051 | && (zerop (gimple_assign_rhs2 (gs: stmt_info->stmt)) |
| 26052 | || zerop (gimple_assign_rhs3 (gs: stmt_info->stmt)) |
| 26053 | || integer_minus_onep |
| 26054 | (gimple_assign_rhs2 (gs: stmt_info->stmt)) |
| 26055 | || integer_minus_onep |
| 26056 | (gimple_assign_rhs3 (gs: stmt_info->stmt)))) |
| 26057 | ninsns = 1; |
| 26058 | |
| 26059 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 26060 | stmt_cost = ninsns * ix86_cost->sse_op; |
| 26061 | else if (X87_FLOAT_MODE_P (mode)) |
| 26062 | /* x87 requires conditional branch. We don't have cost for |
| 26063 | that. */ |
| 26064 | ; |
| 26065 | else if (VECTOR_MODE_P (mode)) |
| 26066 | stmt_cost = ix86_vec_cost (mode, cost: ninsns * ix86_cost->sse_op); |
| 26067 | else |
| 26068 | /* compare (accounted separately) + cmov. */ |
| 26069 | stmt_cost = ix86_cost->add; |
| 26070 | } |
| 26071 | break; |
| 26072 | |
| 26073 | case MIN_EXPR: |
| 26074 | case MAX_EXPR: |
| 26075 | if (fp) |
| 26076 | { |
| 26077 | if (X87_FLOAT_MODE_P (mode) |
| 26078 | && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 26079 | /* x87 requires conditional branch. We don't have cost for |
| 26080 | that. */ |
| 26081 | ; |
| 26082 | else |
| 26083 | /* minss */ |
| 26084 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
| 26085 | } |
| 26086 | else |
| 26087 | { |
| 26088 | if (VECTOR_MODE_P (mode)) |
| 26089 | { |
| 26090 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
| 26091 | /* vpmin was introduced in SSE3. |
| 26092 | SSE2 needs pcmpgtd + pand + pandn + pxor. |
| 26093 | If one of parameters is 0 or -1 the sequence is simplified |
| 26094 | to pcmpgtd + pand. */ |
| 26095 | if (!TARGET_SSSE3) |
| 26096 | { |
| 26097 | if (zerop (gimple_assign_rhs2 (gs: stmt_info->stmt)) |
| 26098 | || integer_minus_onep |
| 26099 | (gimple_assign_rhs2 (gs: stmt_info->stmt))) |
| 26100 | stmt_cost *= 2; |
| 26101 | else |
| 26102 | stmt_cost *= 4; |
| 26103 | } |
| 26104 | } |
| 26105 | else |
| 26106 | /* cmp + cmov. */ |
| 26107 | stmt_cost = ix86_cost->add * 2; |
| 26108 | } |
| 26109 | break; |
| 26110 | |
| 26111 | case ABS_EXPR: |
| 26112 | case ABSU_EXPR: |
| 26113 | if (fp) |
| 26114 | { |
| 26115 | if (X87_FLOAT_MODE_P (mode) |
| 26116 | && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 26117 | /* fabs. */ |
| 26118 | stmt_cost = ix86_cost->fabs; |
| 26119 | else |
| 26120 | /* andss of sign bit. */ |
| 26121 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
| 26122 | } |
| 26123 | else |
| 26124 | { |
| 26125 | if (VECTOR_MODE_P (mode)) |
| 26126 | { |
| 26127 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
| 26128 | /* vabs was introduced in SSE3. |
| 26129 | SSE3 uses psrat + pxor + psub. */ |
| 26130 | if (!TARGET_SSSE3) |
| 26131 | stmt_cost *= 3; |
| 26132 | } |
| 26133 | else |
| 26134 | /* neg + cmov. */ |
| 26135 | stmt_cost = ix86_cost->add * 2; |
| 26136 | } |
| 26137 | break; |
| 26138 | |
| 26139 | case BIT_IOR_EXPR: |
| 26140 | case BIT_XOR_EXPR: |
| 26141 | case BIT_AND_EXPR: |
| 26142 | case BIT_NOT_EXPR: |
| 26143 | gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode) |
| 26144 | && !X87_FLOAT_MODE_P (mode)); |
| 26145 | if (VECTOR_MODE_P (mode)) |
| 26146 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
| 26147 | else |
| 26148 | stmt_cost = ix86_cost->add; |
| 26149 | break; |
| 26150 | |
| 26151 | default: |
| 26152 | if (truth_value_p (code: subcode)) |
| 26153 | { |
| 26154 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
| 26155 | /* CMPccS? insructions are cheap, so use sse_op. While they |
| 26156 | produce a mask which may need to be turned to 0/1 by and, |
| 26157 | expect that this will be optimized away in a common case. */ |
| 26158 | stmt_cost = ix86_cost->sse_op; |
| 26159 | else if (X87_FLOAT_MODE_P (mode)) |
| 26160 | /* fcmp + setcc. */ |
| 26161 | stmt_cost = ix86_cost->fadd + ix86_cost->add; |
| 26162 | else if (VECTOR_MODE_P (mode)) |
| 26163 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
| 26164 | else |
| 26165 | /* setcc. */ |
| 26166 | stmt_cost = ix86_cost->add; |
| 26167 | break; |
| 26168 | } |
| 26169 | break; |
| 26170 | } |
| 26171 | } |
| 26172 | |
| 26173 | /* Record number of load/store/gather/scatter in vectorized body. */ |
| 26174 | if (where == vect_body && !m_costing_for_scalar) |
| 26175 | { |
| 26176 | int scale = 1; |
| 26177 | if (vectype |
| 26178 | && ((GET_MODE_SIZE (TYPE_MODE (vectype)) == 64 |
| 26179 | && TARGET_AVX512_SPLIT_REGS) |
| 26180 | || (GET_MODE_SIZE (TYPE_MODE (vectype)) == 32 |
| 26181 | && TARGET_AVX256_SPLIT_REGS))) |
| 26182 | scale = 2; |
| 26183 | |
| 26184 | switch (kind) |
| 26185 | { |
| 26186 | /* Emulated gather/scatter or any scalarization. */ |
| 26187 | case scalar_load: |
| 26188 | case scalar_stmt: |
| 26189 | case scalar_store: |
| 26190 | case vector_gather_load: |
| 26191 | case vector_scatter_store: |
| 26192 | m_prefer_unroll = false; |
| 26193 | break; |
| 26194 | |
| 26195 | case vector_stmt: |
| 26196 | case vec_to_scalar: |
| 26197 | /* Count number of reduction FMA and "real" DOT_PROD_EXPR, |
| 26198 | unroll in the vectorizer will enable partial sum. */ |
| 26199 | if (stmt_info |
| 26200 | && vect_is_reduction (stmt_info) |
| 26201 | && stmt_info->stmt) |
| 26202 | { |
| 26203 | /* Handle __builtin_fma. */ |
| 26204 | if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA) |
| 26205 | { |
| 26206 | m_num_reduc[X86_REDUC_FMA] += count * scale; |
| 26207 | break; |
| 26208 | } |
| 26209 | |
| 26210 | if (!is_gimple_assign (gs: stmt_info->stmt)) |
| 26211 | break; |
| 26212 | |
| 26213 | tree_code subcode = gimple_assign_rhs_code (gs: stmt_info->stmt); |
| 26214 | machine_mode inner_mode = GET_MODE_INNER (mode); |
| 26215 | tree rhs1, rhs2; |
| 26216 | bool native_vnni_p = true; |
| 26217 | gimple* def; |
| 26218 | machine_mode mode_rhs; |
| 26219 | switch (subcode) |
| 26220 | { |
| 26221 | case PLUS_EXPR: |
| 26222 | case MINUS_EXPR: |
| 26223 | if (!fp || !flag_associative_math |
| 26224 | || flag_fp_contract_mode != FP_CONTRACT_FAST) |
| 26225 | break; |
| 26226 | |
| 26227 | /* FMA condition for different modes. */ |
| 26228 | if (((inner_mode == DFmode || inner_mode == SFmode) |
| 26229 | && !TARGET_FMA && !TARGET_AVX512VL) |
| 26230 | || (inner_mode == HFmode && !TARGET_AVX512FP16) |
| 26231 | || (inner_mode == BFmode && !TARGET_AVX10_2)) |
| 26232 | break; |
| 26233 | |
| 26234 | /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed |
| 26235 | to FMA/FNMA after vectorization. */ |
| 26236 | rhs1 = gimple_assign_rhs1 (gs: stmt_info->stmt); |
| 26237 | rhs2 = gimple_assign_rhs2 (gs: stmt_info->stmt); |
| 26238 | if (subcode == PLUS_EXPR |
| 26239 | && TREE_CODE (rhs1) == SSA_NAME |
| 26240 | && (def = SSA_NAME_DEF_STMT (rhs1), true) |
| 26241 | && is_gimple_assign (gs: def) |
| 26242 | && gimple_assign_rhs_code (gs: def) == MULT_EXPR) |
| 26243 | m_num_reduc[X86_REDUC_FMA] += count * scale; |
| 26244 | else if (TREE_CODE (rhs2) == SSA_NAME |
| 26245 | && (def = SSA_NAME_DEF_STMT (rhs2), true) |
| 26246 | && is_gimple_assign (gs: def) |
| 26247 | && gimple_assign_rhs_code (gs: def) == MULT_EXPR) |
| 26248 | m_num_reduc[X86_REDUC_FMA] += count * scale; |
| 26249 | break; |
| 26250 | |
| 26251 | /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR, |
| 26252 | WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports |
| 26253 | SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR. */ |
| 26254 | case DOT_PROD_EXPR: |
| 26255 | rhs1 = gimple_assign_rhs1 (gs: stmt_info->stmt); |
| 26256 | mode_rhs = TYPE_MODE (TREE_TYPE (rhs1)); |
| 26257 | if (mode_rhs == QImode) |
| 26258 | { |
| 26259 | rhs2 = gimple_assign_rhs2 (gs: stmt_info->stmt); |
| 26260 | signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1)); |
| 26261 | signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2)); |
| 26262 | |
| 26263 | /* vpdpbusd. */ |
| 26264 | if (signop1_p != signop2_p) |
| 26265 | native_vnni_p |
| 26266 | = (GET_MODE_SIZE (mode) == 64 |
| 26267 | ? TARGET_AVX512VNNI |
| 26268 | : ((TARGET_AVX512VNNI && TARGET_AVX512VL) |
| 26269 | || TARGET_AVXVNNI)); |
| 26270 | else |
| 26271 | /* vpdpbssd. */ |
| 26272 | native_vnni_p |
| 26273 | = (GET_MODE_SIZE (mode) == 64 |
| 26274 | ? TARGET_AVX10_2 |
| 26275 | : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2)); |
| 26276 | } |
| 26277 | m_num_reduc[X86_REDUC_DOT_PROD] += count * scale; |
| 26278 | |
| 26279 | /* Dislike to do unroll and partial sum for |
| 26280 | emulated DOT_PROD_EXPR. */ |
| 26281 | if (!native_vnni_p) |
| 26282 | m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count; |
| 26283 | break; |
| 26284 | |
| 26285 | case SAD_EXPR: |
| 26286 | m_num_reduc[X86_REDUC_SAD] += count * scale; |
| 26287 | break; |
| 26288 | |
| 26289 | default: |
| 26290 | break; |
| 26291 | } |
| 26292 | } |
| 26293 | |
| 26294 | default: |
| 26295 | break; |
| 26296 | } |
| 26297 | } |
| 26298 | |
| 26299 | |
| 26300 | combined_fn cfn; |
| 26301 | if ((kind == vector_stmt || kind == scalar_stmt) |
| 26302 | && stmt_info |
| 26303 | && stmt_info->stmt |
| 26304 | && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST) |
| 26305 | switch (cfn) |
| 26306 | { |
| 26307 | case CFN_FMA: |
| 26308 | stmt_cost = ix86_vec_cost (mode, |
| 26309 | cost: mode == SFmode ? ix86_cost->fmass |
| 26310 | : ix86_cost->fmasd); |
| 26311 | break; |
| 26312 | case CFN_MULH: |
| 26313 | stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode); |
| 26314 | break; |
| 26315 | default: |
| 26316 | break; |
| 26317 | } |
| 26318 | |
| 26319 | if (kind == vec_promote_demote) |
| 26320 | { |
| 26321 | int outer_size |
| 26322 | = tree_to_uhwi |
| 26323 | (TYPE_SIZE |
| 26324 | (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)))); |
| 26325 | int inner_size |
| 26326 | = tree_to_uhwi |
| 26327 | (TYPE_SIZE |
| 26328 | (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))); |
| 26329 | bool inner_fp = FLOAT_TYPE_P |
| 26330 | (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))); |
| 26331 | |
| 26332 | if (fp && inner_fp) |
| 26333 | stmt_cost = vec_fp_conversion_cost |
| 26334 | (cost: ix86_tune_cost, GET_MODE_BITSIZE (mode)); |
| 26335 | else if (fp && !inner_fp) |
| 26336 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtpi2ps); |
| 26337 | else if (!fp && inner_fp) |
| 26338 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtps2pi); |
| 26339 | else |
| 26340 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
| 26341 | /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is |
| 26342 | greater than inner size we will end up doing two conversions and |
| 26343 | packing them. We always pack pairs; if the size difference is greater |
| 26344 | it is split into multiple demote operations. */ |
| 26345 | if (inner_size > outer_size) |
| 26346 | stmt_cost = stmt_cost * 2 |
| 26347 | + ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
| 26348 | } |
| 26349 | |
| 26350 | /* If we do elementwise loads into a vector then we are bound by |
| 26351 | latency and execution resources for the many scalar loads |
| 26352 | (AGU and load ports). Try to account for this by scaling the |
| 26353 | construction cost by the number of elements involved. */ |
| 26354 | if ((kind == vec_construct || kind == vec_to_scalar) |
| 26355 | && ((node |
| 26356 | && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE |
| 26357 | || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP |
| 26358 | && SLP_TREE_LANES (node) == 1)) |
| 26359 | && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF |
| 26360 | (SLP_TREE_REPRESENTATIVE (node)))) |
| 26361 | != INTEGER_CST)) |
| 26362 | || mat_gather_scatter_p (mat: SLP_TREE_MEMORY_ACCESS_TYPE (node)))))) |
| 26363 | { |
| 26364 | stmt_cost = ix86_default_vector_cost (type_of_cost: kind, mode); |
| 26365 | stmt_cost *= (TYPE_VECTOR_SUBPARTS (node: vectype) + 1); |
| 26366 | } |
| 26367 | else if ((kind == vec_construct || kind == scalar_to_vec) |
| 26368 | && node |
| 26369 | && SLP_TREE_DEF_TYPE (node) == vect_external_def) |
| 26370 | { |
| 26371 | stmt_cost = ix86_default_vector_cost (type_of_cost: kind, mode); |
| 26372 | unsigned i; |
| 26373 | tree op; |
| 26374 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
| 26375 | if (TREE_CODE (op) == SSA_NAME) |
| 26376 | TREE_VISITED (op) = 0; |
| 26377 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
| 26378 | { |
| 26379 | if (TREE_CODE (op) != SSA_NAME |
| 26380 | || TREE_VISITED (op)) |
| 26381 | continue; |
| 26382 | TREE_VISITED (op) = 1; |
| 26383 | gimple *def = SSA_NAME_DEF_STMT (op); |
| 26384 | tree tem; |
| 26385 | if (is_gimple_assign (gs: def) |
| 26386 | && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)) |
| 26387 | && ((tem = gimple_assign_rhs1 (gs: def)), true) |
| 26388 | && TREE_CODE (tem) == SSA_NAME |
| 26389 | /* A sign-change expands to nothing. */ |
| 26390 | && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)), |
| 26391 | TREE_TYPE (tem))) |
| 26392 | def = SSA_NAME_DEF_STMT (tem); |
| 26393 | /* When the component is loaded from memory we can directly |
| 26394 | move it to a vector register, otherwise we have to go |
| 26395 | via a GPR or via vpinsr which involves similar cost. |
| 26396 | Likewise with a BIT_FIELD_REF extracting from a vector |
| 26397 | register we can hope to avoid using a GPR. */ |
| 26398 | if (!is_gimple_assign (gs: def) |
| 26399 | || ((!gimple_assign_load_p (def) |
| 26400 | || (!TARGET_SSE4_1 |
| 26401 | && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) == 1)) |
| 26402 | && (gimple_assign_rhs_code (gs: def) != BIT_FIELD_REF |
| 26403 | || !VECTOR_TYPE_P (TREE_TYPE |
| 26404 | (TREE_OPERAND (gimple_assign_rhs1 (def), 0)))))) |
| 26405 | { |
| 26406 | if (fp) |
| 26407 | { |
| 26408 | /* Scalar FP values residing in x87 registers need to be |
| 26409 | spilled and reloaded. */ |
| 26410 | auto mode2 = TYPE_MODE (TREE_TYPE (op)); |
| 26411 | if (IS_STACK_MODE (mode2)) |
| 26412 | { |
| 26413 | int cost |
| 26414 | = (ix86_cost->hard_register.fp_store[mode2 == SFmode |
| 26415 | ? 0 : 1] |
| 26416 | + ix86_cost->sse_load[sse_store_index (mode: mode2)]); |
| 26417 | stmt_cost += COSTS_N_INSNS (cost) / 2; |
| 26418 | } |
| 26419 | m_num_sse_needed[where]++; |
| 26420 | } |
| 26421 | else |
| 26422 | { |
| 26423 | m_num_gpr_needed[where]++; |
| 26424 | |
| 26425 | stmt_cost += COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2; |
| 26426 | } |
| 26427 | } |
| 26428 | } |
| 26429 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
| 26430 | if (TREE_CODE (op) == SSA_NAME) |
| 26431 | TREE_VISITED (op) = 0; |
| 26432 | } |
| 26433 | if (stmt_cost == -1) |
| 26434 | stmt_cost = ix86_default_vector_cost (type_of_cost: kind, mode); |
| 26435 | |
| 26436 | if (kind == vec_perm && vectype |
| 26437 | && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32 |
| 26438 | /* BIT_FIELD_REF <vect_**, 64, 0> 0 times vec_perm costs 0 in body. */ |
| 26439 | && count != 0) |
| 26440 | { |
| 26441 | bool real_perm = true; |
| 26442 | unsigned nunits = TYPE_VECTOR_SUBPARTS (node: vectype); |
| 26443 | |
| 26444 | if (node |
| 26445 | && SLP_TREE_LOAD_PERMUTATION (node).exists () |
| 26446 | /* Loop vectorization will have 4 times vec_perm |
| 26447 | with index as {0, 0, 0, 0}. |
| 26448 | But it actually generates |
| 26449 | vec_perm_expr <vect, vect, 0, 0, 0, 0> |
| 26450 | vec_perm_expr <vect, vect, 1, 1, 1, 1> |
| 26451 | vec_perm_expr <vect, vect, 2, 2, 2, 2> |
| 26452 | Need to be handled separately. */ |
| 26453 | && is_a <bb_vec_info> (p: m_vinfo)) |
| 26454 | { |
| 26455 | unsigned half = nunits / 2; |
| 26456 | unsigned i = 0; |
| 26457 | bool allsame = true; |
| 26458 | unsigned first = SLP_TREE_LOAD_PERMUTATION (node)[0]; |
| 26459 | bool cross_lane_p = false; |
| 26460 | for (i = 0 ; i != SLP_TREE_LANES (node); i++) |
| 26461 | { |
| 26462 | unsigned tmp = SLP_TREE_LOAD_PERMUTATION (node)[i]; |
| 26463 | /* allsame is just a broadcast. */ |
| 26464 | if (tmp != first) |
| 26465 | allsame = false; |
| 26466 | |
| 26467 | /* 4 times vec_perm with number of lanes multiple of nunits. */ |
| 26468 | tmp = tmp & (nunits - 1); |
| 26469 | unsigned index = i & (nunits - 1); |
| 26470 | if ((index < half && tmp >= half) |
| 26471 | || (index >= half && tmp < half)) |
| 26472 | cross_lane_p = true; |
| 26473 | |
| 26474 | if (!allsame && cross_lane_p) |
| 26475 | break; |
| 26476 | } |
| 26477 | |
| 26478 | if (i == SLP_TREE_LANES (node)) |
| 26479 | real_perm = false; |
| 26480 | } |
| 26481 | |
| 26482 | if (real_perm) |
| 26483 | { |
| 26484 | m_num_avx256_vec_perm[where] += count; |
| 26485 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 26486 | { |
| 26487 | fprintf (stream: dump_file, format: "Detected avx256 cross-lane permutation: " ); |
| 26488 | if (stmt_info) |
| 26489 | print_gimple_expr (dump_file, stmt_info->stmt, 0, TDF_SLIM); |
| 26490 | fprintf (stream: dump_file, format: " \n" ); |
| 26491 | } |
| 26492 | } |
| 26493 | } |
| 26494 | |
| 26495 | /* Penalize DFmode vector operations for Bonnell. */ |
| 26496 | if (TARGET_CPU_P (BONNELL) && kind == vector_stmt |
| 26497 | && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode) |
| 26498 | stmt_cost *= 5; /* FIXME: The value here is arbitrary. */ |
| 26499 | |
| 26500 | /* Statements in an inner loop relative to the loop being |
| 26501 | vectorized are weighted more heavily. The value here is |
| 26502 | arbitrary and could potentially be improved with analysis. */ |
| 26503 | retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost); |
| 26504 | |
| 26505 | /* We need to multiply all vector stmt cost by 1.7 (estimated cost) |
| 26506 | for Silvermont as it has out of order integer pipeline and can execute |
| 26507 | 2 scalar instruction per tick, but has in order SIMD pipeline. */ |
| 26508 | if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT) |
| 26509 | || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL)) |
| 26510 | && stmt_info && stmt_info->stmt) |
| 26511 | { |
| 26512 | tree lhs_op = gimple_get_lhs (stmt_info->stmt); |
| 26513 | if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE) |
| 26514 | retval = (retval * 17) / 10; |
| 26515 | } |
| 26516 | |
| 26517 | m_costs[where] += retval; |
| 26518 | |
| 26519 | return retval; |
| 26520 | } |
| 26521 | |
| 26522 | void |
| 26523 | ix86_vector_costs::ix86_vect_estimate_reg_pressure () |
| 26524 | { |
| 26525 | unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2; |
| 26526 | unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2; |
| 26527 | |
| 26528 | /* Any better way to have target available fp registers, currently use SSE_REGS. */ |
| 26529 | unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8; |
| 26530 | for (unsigned i = 0; i != 3; i++) |
| 26531 | { |
| 26532 | if (m_num_gpr_needed[i] > target_avail_regs) |
| 26533 | m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs); |
| 26534 | /* Only measure sse registers pressure. */ |
| 26535 | if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse)) |
| 26536 | m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse); |
| 26537 | } |
| 26538 | } |
| 26539 | |
| 26540 | void |
| 26541 | ix86_vector_costs::finish_cost (const vector_costs *scalar_costs) |
| 26542 | { |
| 26543 | loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: m_vinfo); |
| 26544 | if (loop_vinfo && !m_costing_for_scalar) |
| 26545 | { |
| 26546 | /* We are currently not asking the vectorizer to compare costs |
| 26547 | between different vector mode sizes. When using predication |
| 26548 | that will end up always choosing the prefered mode size even |
| 26549 | if there's a smaller mode covering all lanes. Test for this |
| 26550 | situation and artificially reject the larger mode attempt. |
| 26551 | ??? We currently lack masked ops for sub-SSE sized modes, |
| 26552 | so we could restrict this rejection to AVX and AVX512 modes |
| 26553 | but error on the safe side for now. */ |
| 26554 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) |
| 26555 | && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) |
| 26556 | && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
| 26557 | && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()) |
| 26558 | > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo)))) |
| 26559 | m_costs[vect_body] = INT_MAX; |
| 26560 | |
| 26561 | /* We'd like to avoid using masking if there's an in-order reduction |
| 26562 | to vectorize because that will also perform in-order adds of |
| 26563 | masked elements (as neutral value, of course) here, but there |
| 26564 | is currently no way to indicate to try un-masked with the same |
| 26565 | mode. */ |
| 26566 | |
| 26567 | bool any_reduc_p = false; |
| 26568 | for (int i = 0; i != X86_REDUC_LAST; i++) |
| 26569 | if (m_num_reduc[i]) |
| 26570 | { |
| 26571 | any_reduc_p = true; |
| 26572 | break; |
| 26573 | } |
| 26574 | |
| 26575 | if (any_reduc_p |
| 26576 | /* Not much gain for loop with gather and scatter. */ |
| 26577 | && m_prefer_unroll |
| 26578 | && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)) |
| 26579 | { |
| 26580 | unsigned unroll_factor |
| 26581 | = OPTION_SET_P (ix86_vect_unroll_limit) |
| 26582 | ? ix86_vect_unroll_limit |
| 26583 | : ix86_cost->vect_unroll_limit; |
| 26584 | |
| 26585 | if (unroll_factor > 1) |
| 26586 | { |
| 26587 | for (int i = 0 ; i != X86_REDUC_LAST; i++) |
| 26588 | { |
| 26589 | if (m_num_reduc[i]) |
| 26590 | { |
| 26591 | unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i], |
| 26592 | m_num_reduc[i]); |
| 26593 | unroll_factor = MIN (unroll_factor, tmp); |
| 26594 | } |
| 26595 | } |
| 26596 | |
| 26597 | m_suggested_unroll_factor = 1 << ceil_log2 (x: unroll_factor); |
| 26598 | } |
| 26599 | } |
| 26600 | |
| 26601 | } |
| 26602 | |
| 26603 | ix86_vect_estimate_reg_pressure (); |
| 26604 | |
| 26605 | for (int i = 0; i != 3; i++) |
| 26606 | if (m_num_avx256_vec_perm[i] |
| 26607 | && TARGET_AVX256_AVOID_VEC_PERM) |
| 26608 | m_costs[i] = INT_MAX; |
| 26609 | |
| 26610 | /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both |
| 26611 | a AVX2 and a SSE epilogue for AVX512 vectorized loops. */ |
| 26612 | if (loop_vinfo |
| 26613 | && LOOP_VINFO_EPILOGUE_P (loop_vinfo) |
| 26614 | && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32 |
| 26615 | && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES]) |
| 26616 | m_suggested_epilogue_mode = V16QImode; |
| 26617 | /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger |
| 26618 | enable a 64bit SSE epilogue. */ |
| 26619 | if (loop_vinfo |
| 26620 | && LOOP_VINFO_EPILOGUE_P (loop_vinfo) |
| 26621 | && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16 |
| 26622 | && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16) |
| 26623 | m_suggested_epilogue_mode = V8QImode; |
| 26624 | |
| 26625 | /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use |
| 26626 | a masked epilogue if that doesn't seem detrimental. */ |
| 26627 | if (loop_vinfo |
| 26628 | && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) |
| 26629 | && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2 |
| 26630 | /* Avoid a masked epilog if cascaded epilogues eventually get us |
| 26631 | to one with VF 1 as that means no scalar epilog at all. */ |
| 26632 | && !((GET_MODE_SIZE (loop_vinfo->vector_mode) |
| 26633 | / LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () == 16) |
| 26634 | && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES]) |
| 26635 | && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES] |
| 26636 | && !OPTION_SET_P (param_vect_partial_vector_usage)) |
| 26637 | { |
| 26638 | bool avoid = false; |
| 26639 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
| 26640 | && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0) |
| 26641 | { |
| 26642 | unsigned int peel_niter |
| 26643 | = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); |
| 26644 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) |
| 26645 | peel_niter += 1; |
| 26646 | /* When we know the number of scalar iterations of the epilogue, |
| 26647 | avoid masking when a single vector epilog iteration handles |
| 26648 | it in full. */ |
| 26649 | if (pow2p_hwi (x: (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter) |
| 26650 | % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())) |
| 26651 | avoid = true; |
| 26652 | } |
| 26653 | if (!avoid && loop_outer (loop: loop_outer (LOOP_VINFO_LOOP (loop_vinfo)))) |
| 26654 | for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo)) |
| 26655 | { |
| 26656 | if (DDR_ARE_DEPENDENT (ddr) == chrec_known) |
| 26657 | ; |
| 26658 | else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) |
| 26659 | ; |
| 26660 | else |
| 26661 | { |
| 26662 | int loop_depth |
| 26663 | = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num, |
| 26664 | DDR_LOOP_NEST (ddr)); |
| 26665 | if (DDR_NUM_DIST_VECTS (ddr) == 1 |
| 26666 | && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0) |
| 26667 | { |
| 26668 | /* Avoid the case when there's an outer loop that might |
| 26669 | traverse a multi-dimensional array with the inner |
| 26670 | loop just executing the masked epilogue with a |
| 26671 | read-write where the next outer iteration might |
| 26672 | read from the masked part of the previous write, |
| 26673 | 'n' filling half a vector. |
| 26674 | for (j = 0; j < m; ++j) |
| 26675 | for (i = 0; i < n; ++i) |
| 26676 | a[j][i] = c * a[j][i]; */ |
| 26677 | avoid = true; |
| 26678 | break; |
| 26679 | } |
| 26680 | } |
| 26681 | } |
| 26682 | /* Avoid using masking if there's an in-order reduction |
| 26683 | to vectorize because that will also perform in-order adds of |
| 26684 | masked elements (as neutral value, of course). */ |
| 26685 | if (!avoid) |
| 26686 | { |
| 26687 | for (auto inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo)) |
| 26688 | if (SLP_INSTANCE_KIND (inst) == slp_inst_kind_reduc_group |
| 26689 | && (vect_reduc_type (vinfo: loop_vinfo, SLP_INSTANCE_TREE (inst)) |
| 26690 | == FOLD_LEFT_REDUCTION)) |
| 26691 | { |
| 26692 | avoid = true; |
| 26693 | break; |
| 26694 | } |
| 26695 | } |
| 26696 | if (!avoid) |
| 26697 | { |
| 26698 | m_suggested_epilogue_mode = loop_vinfo->vector_mode; |
| 26699 | m_masked_epilogue = 1; |
| 26700 | } |
| 26701 | } |
| 26702 | |
| 26703 | vector_costs::finish_cost (scalar_costs); |
| 26704 | } |
| 26705 | |
| 26706 | /* Validate target specific memory model bits in VAL. */ |
| 26707 | |
| 26708 | static unsigned HOST_WIDE_INT |
| 26709 | ix86_memmodel_check (unsigned HOST_WIDE_INT val) |
| 26710 | { |
| 26711 | enum memmodel model = memmodel_from_int (val); |
| 26712 | bool strong; |
| 26713 | |
| 26714 | if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE |
| 26715 | |MEMMODEL_MASK) |
| 26716 | || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE))) |
| 26717 | { |
| 26718 | warning (OPT_Winvalid_memory_model, |
| 26719 | "unknown architecture specific memory model" ); |
| 26720 | return MEMMODEL_SEQ_CST; |
| 26721 | } |
| 26722 | strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model)); |
| 26723 | if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong)) |
| 26724 | { |
| 26725 | warning (OPT_Winvalid_memory_model, |
| 26726 | "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger " |
| 26727 | "memory model" ); |
| 26728 | return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE; |
| 26729 | } |
| 26730 | if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong)) |
| 26731 | { |
| 26732 | warning (OPT_Winvalid_memory_model, |
| 26733 | "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger " |
| 26734 | "memory model" ); |
| 26735 | return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE; |
| 26736 | } |
| 26737 | return val; |
| 26738 | } |
| 26739 | |
| 26740 | /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int, |
| 26741 | CLONEI->vecsize_float and if CLONEI->simdlen is 0, also |
| 26742 | CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted, |
| 26743 | or number of vecsize_mangle variants that should be emitted. */ |
| 26744 | |
| 26745 | static int |
| 26746 | ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, |
| 26747 | struct cgraph_simd_clone *clonei, |
| 26748 | tree base_type, int num, |
| 26749 | bool explicit_p) |
| 26750 | { |
| 26751 | int ret = 1; |
| 26752 | |
| 26753 | if (clonei->simdlen |
| 26754 | && (clonei->simdlen < 2 |
| 26755 | || clonei->simdlen > 1024 |
| 26756 | || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) |
| 26757 | { |
| 26758 | if (explicit_p) |
| 26759 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
| 26760 | "unsupported simdlen %wd" , clonei->simdlen.to_constant ()); |
| 26761 | return 0; |
| 26762 | } |
| 26763 | |
| 26764 | tree ret_type = TREE_TYPE (TREE_TYPE (node->decl)); |
| 26765 | if (TREE_CODE (ret_type) != VOID_TYPE) |
| 26766 | switch (TYPE_MODE (ret_type)) |
| 26767 | { |
| 26768 | case E_QImode: |
| 26769 | case E_HImode: |
| 26770 | case E_SImode: |
| 26771 | case E_DImode: |
| 26772 | case E_SFmode: |
| 26773 | case E_DFmode: |
| 26774 | /* case E_SCmode: */ |
| 26775 | /* case E_DCmode: */ |
| 26776 | if (!AGGREGATE_TYPE_P (ret_type)) |
| 26777 | break; |
| 26778 | /* FALLTHRU */ |
| 26779 | default: |
| 26780 | if (explicit_p) |
| 26781 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
| 26782 | "unsupported return type %qT for simd" , ret_type); |
| 26783 | return 0; |
| 26784 | } |
| 26785 | |
| 26786 | tree t; |
| 26787 | int i; |
| 26788 | tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl)); |
| 26789 | bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE); |
| 26790 | |
| 26791 | for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0; |
| 26792 | t && t != void_list_node; t = TREE_CHAIN (t), i++) |
| 26793 | { |
| 26794 | tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); |
| 26795 | switch (TYPE_MODE (arg_type)) |
| 26796 | { |
| 26797 | case E_QImode: |
| 26798 | case E_HImode: |
| 26799 | case E_SImode: |
| 26800 | case E_DImode: |
| 26801 | case E_SFmode: |
| 26802 | case E_DFmode: |
| 26803 | /* case E_SCmode: */ |
| 26804 | /* case E_DCmode: */ |
| 26805 | if (!AGGREGATE_TYPE_P (arg_type)) |
| 26806 | break; |
| 26807 | /* FALLTHRU */ |
| 26808 | default: |
| 26809 | if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM) |
| 26810 | break; |
| 26811 | if (explicit_p) |
| 26812 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
| 26813 | "unsupported argument type %qT for simd" , arg_type); |
| 26814 | return 0; |
| 26815 | } |
| 26816 | } |
| 26817 | |
| 26818 | if (!TREE_PUBLIC (node->decl) || !explicit_p) |
| 26819 | { |
| 26820 | /* If the function isn't exported, we can pick up just one ISA |
| 26821 | for the clones. */ |
| 26822 | if (TARGET_AVX512F) |
| 26823 | clonei->vecsize_mangle = 'e'; |
| 26824 | else if (TARGET_AVX2) |
| 26825 | clonei->vecsize_mangle = 'd'; |
| 26826 | else if (TARGET_AVX) |
| 26827 | clonei->vecsize_mangle = 'c'; |
| 26828 | else |
| 26829 | clonei->vecsize_mangle = 'b'; |
| 26830 | ret = 1; |
| 26831 | } |
| 26832 | else |
| 26833 | { |
| 26834 | clonei->vecsize_mangle = "bcde" [num]; |
| 26835 | ret = 4; |
| 26836 | } |
| 26837 | clonei->mask_mode = VOIDmode; |
| 26838 | switch (clonei->vecsize_mangle) |
| 26839 | { |
| 26840 | case 'b': |
| 26841 | clonei->vecsize_int = 128; |
| 26842 | clonei->vecsize_float = 128; |
| 26843 | break; |
| 26844 | case 'c': |
| 26845 | clonei->vecsize_int = 128; |
| 26846 | clonei->vecsize_float = 256; |
| 26847 | break; |
| 26848 | case 'd': |
| 26849 | clonei->vecsize_int = 256; |
| 26850 | clonei->vecsize_float = 256; |
| 26851 | break; |
| 26852 | case 'e': |
| 26853 | clonei->vecsize_int = 512; |
| 26854 | clonei->vecsize_float = 512; |
| 26855 | if (TYPE_MODE (base_type) == QImode) |
| 26856 | clonei->mask_mode = DImode; |
| 26857 | else |
| 26858 | clonei->mask_mode = SImode; |
| 26859 | break; |
| 26860 | } |
| 26861 | if (clonei->simdlen == 0) |
| 26862 | { |
| 26863 | if (SCALAR_INT_MODE_P (TYPE_MODE (base_type))) |
| 26864 | clonei->simdlen = clonei->vecsize_int; |
| 26865 | else |
| 26866 | clonei->simdlen = clonei->vecsize_float; |
| 26867 | clonei->simdlen = clonei->simdlen |
| 26868 | / GET_MODE_BITSIZE (TYPE_MODE (base_type)); |
| 26869 | } |
| 26870 | else if (clonei->simdlen > 16) |
| 26871 | { |
| 26872 | /* For compatibility with ICC, use the same upper bounds |
| 26873 | for simdlen. In particular, for CTYPE below, use the return type, |
| 26874 | unless the function returns void, in that case use the characteristic |
| 26875 | type. If it is possible for given SIMDLEN to pass CTYPE value |
| 26876 | in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs |
| 26877 | for 64-bit code), accept that SIMDLEN, otherwise warn and don't |
| 26878 | emit corresponding clone. */ |
| 26879 | tree ctype = ret_type; |
| 26880 | if (VOID_TYPE_P (ret_type)) |
| 26881 | ctype = base_type; |
| 26882 | int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen; |
| 26883 | if (SCALAR_INT_MODE_P (TYPE_MODE (ctype))) |
| 26884 | cnt /= clonei->vecsize_int; |
| 26885 | else |
| 26886 | cnt /= clonei->vecsize_float; |
| 26887 | if (cnt > (TARGET_64BIT ? 16 : 8)) |
| 26888 | { |
| 26889 | if (explicit_p) |
| 26890 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
| 26891 | "unsupported simdlen %wd" , |
| 26892 | clonei->simdlen.to_constant ()); |
| 26893 | return 0; |
| 26894 | } |
| 26895 | } |
| 26896 | return ret; |
| 26897 | } |
| 26898 | |
| 26899 | /* If SIMD clone NODE can't be used in a vectorized loop |
| 26900 | in current function, return -1, otherwise return a badness of using it |
| 26901 | (0 if it is most desirable from vecsize_mangle point of view, 1 |
| 26902 | slightly less desirable, etc.). */ |
| 26903 | |
| 26904 | static int |
| 26905 | ix86_simd_clone_usable (struct cgraph_node *node, machine_mode) |
| 26906 | { |
| 26907 | switch (node->simdclone->vecsize_mangle) |
| 26908 | { |
| 26909 | case 'b': |
| 26910 | if (!TARGET_SSE2) |
| 26911 | return -1; |
| 26912 | if (!TARGET_AVX) |
| 26913 | return 0; |
| 26914 | return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1; |
| 26915 | case 'c': |
| 26916 | if (!TARGET_AVX) |
| 26917 | return -1; |
| 26918 | return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0; |
| 26919 | case 'd': |
| 26920 | if (!TARGET_AVX2) |
| 26921 | return -1; |
| 26922 | return TARGET_AVX512F ? 1 : 0; |
| 26923 | case 'e': |
| 26924 | if (!TARGET_AVX512F) |
| 26925 | return -1; |
| 26926 | return 0; |
| 26927 | default: |
| 26928 | gcc_unreachable (); |
| 26929 | } |
| 26930 | } |
| 26931 | |
| 26932 | /* This function adjusts the unroll factor based on |
| 26933 | the hardware capabilities. For ex, bdver3 has |
| 26934 | a loop buffer which makes unrolling of smaller |
| 26935 | loops less important. This function decides the |
| 26936 | unroll factor using number of memory references |
| 26937 | (value 32 is used) as a heuristic. */ |
| 26938 | |
| 26939 | static unsigned |
| 26940 | ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop) |
| 26941 | { |
| 26942 | basic_block *bbs; |
| 26943 | rtx_insn *insn; |
| 26944 | unsigned i; |
| 26945 | unsigned mem_count = 0; |
| 26946 | |
| 26947 | /* Unroll small size loop when unroll factor is not explicitly |
| 26948 | specified. */ |
| 26949 | if (ix86_unroll_only_small_loops && !loop->unroll) |
| 26950 | { |
| 26951 | if (loop->ninsns <= ix86_cost->small_unroll_ninsns) |
| 26952 | return MIN (nunroll, ix86_cost->small_unroll_factor); |
| 26953 | else |
| 26954 | return 1; |
| 26955 | } |
| 26956 | |
| 26957 | if (!TARGET_ADJUST_UNROLL) |
| 26958 | return nunroll; |
| 26959 | |
| 26960 | /* Count the number of memory references within the loop body. |
| 26961 | This value determines the unrolling factor for bdver3 and bdver4 |
| 26962 | architectures. */ |
| 26963 | subrtx_iterator::array_type array; |
| 26964 | bbs = get_loop_body (loop); |
| 26965 | for (i = 0; i < loop->num_nodes; i++) |
| 26966 | FOR_BB_INSNS (bbs[i], insn) |
| 26967 | if (NONDEBUG_INSN_P (insn)) |
| 26968 | FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) |
| 26969 | if (const_rtx x = *iter) |
| 26970 | if (MEM_P (x)) |
| 26971 | { |
| 26972 | machine_mode mode = GET_MODE (x); |
| 26973 | unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; |
| 26974 | if (n_words > 4) |
| 26975 | mem_count += 2; |
| 26976 | else |
| 26977 | mem_count += 1; |
| 26978 | } |
| 26979 | free (ptr: bbs); |
| 26980 | |
| 26981 | if (mem_count && mem_count <=32) |
| 26982 | return MIN (nunroll, 32 / mem_count); |
| 26983 | |
| 26984 | return nunroll; |
| 26985 | } |
| 26986 | |
| 26987 | |
| 26988 | /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */ |
| 26989 | |
| 26990 | static bool |
| 26991 | ix86_float_exceptions_rounding_supported_p (void) |
| 26992 | { |
| 26993 | /* For x87 floating point with standard excess precision handling, |
| 26994 | there is no adddf3 pattern (since x87 floating point only has |
| 26995 | XFmode operations) so the default hook implementation gets this |
| 26996 | wrong. */ |
| 26997 | return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH); |
| 26998 | } |
| 26999 | |
| 27000 | /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ |
| 27001 | |
| 27002 | static void |
| 27003 | ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) |
| 27004 | { |
| 27005 | if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH)) |
| 27006 | return; |
| 27007 | tree exceptions_var = create_tmp_var_raw (integer_type_node); |
| 27008 | if (TARGET_80387) |
| 27009 | { |
| 27010 | tree fenv_index_type = build_index_type (size_int (6)); |
| 27011 | tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type); |
| 27012 | tree fenv_var = create_tmp_var_raw (fenv_type); |
| 27013 | TREE_ADDRESSABLE (fenv_var) = 1; |
| 27014 | tree fenv_ptr = build_pointer_type (fenv_type); |
| 27015 | tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var); |
| 27016 | fenv_addr = fold_convert (ptr_type_node, fenv_addr); |
| 27017 | tree fnstenv = get_ix86_builtin (c: IX86_BUILTIN_FNSTENV); |
| 27018 | tree fldenv = get_ix86_builtin (c: IX86_BUILTIN_FLDENV); |
| 27019 | tree fnstsw = get_ix86_builtin (c: IX86_BUILTIN_FNSTSW); |
| 27020 | tree fnclex = get_ix86_builtin (c: IX86_BUILTIN_FNCLEX); |
| 27021 | tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr); |
| 27022 | tree hold_fnclex = build_call_expr (fnclex, 0); |
| 27023 | fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv, |
| 27024 | NULL_TREE, NULL_TREE); |
| 27025 | *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, |
| 27026 | hold_fnclex); |
| 27027 | *clear = build_call_expr (fnclex, 0); |
| 27028 | tree sw_var = create_tmp_var_raw (short_unsigned_type_node); |
| 27029 | tree fnstsw_call = build_call_expr (fnstsw, 0); |
| 27030 | tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var, |
| 27031 | fnstsw_call, NULL_TREE, NULL_TREE); |
| 27032 | tree exceptions_x87 = fold_convert (integer_type_node, sw_var); |
| 27033 | tree update_mod = build4 (TARGET_EXPR, integer_type_node, |
| 27034 | exceptions_var, exceptions_x87, |
| 27035 | NULL_TREE, NULL_TREE); |
| 27036 | *update = build2 (COMPOUND_EXPR, integer_type_node, |
| 27037 | sw_mod, update_mod); |
| 27038 | tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr); |
| 27039 | *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv); |
| 27040 | } |
| 27041 | if (TARGET_SSE && TARGET_SSE_MATH) |
| 27042 | { |
| 27043 | tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node); |
| 27044 | tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node); |
| 27045 | tree stmxcsr = get_ix86_builtin (c: IX86_BUILTIN_STMXCSR); |
| 27046 | tree ldmxcsr = get_ix86_builtin (c: IX86_BUILTIN_LDMXCSR); |
| 27047 | tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0); |
| 27048 | tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node, |
| 27049 | mxcsr_orig_var, stmxcsr_hold_call, |
| 27050 | NULL_TREE, NULL_TREE); |
| 27051 | tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node, |
| 27052 | mxcsr_orig_var, |
| 27053 | build_int_cst (unsigned_type_node, 0x1f80)); |
| 27054 | hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val, |
| 27055 | build_int_cst (unsigned_type_node, 0xffffffc0)); |
| 27056 | tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node, |
| 27057 | mxcsr_mod_var, hold_mod_val, |
| 27058 | NULL_TREE, NULL_TREE); |
| 27059 | tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var); |
| 27060 | tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node, |
| 27061 | hold_assign_orig, hold_assign_mod); |
| 27062 | hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all, |
| 27063 | ldmxcsr_hold_call); |
| 27064 | if (*hold) |
| 27065 | *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all); |
| 27066 | else |
| 27067 | *hold = hold_all; |
| 27068 | tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var); |
| 27069 | if (*clear) |
| 27070 | *clear = build2 (COMPOUND_EXPR, void_type_node, *clear, |
| 27071 | ldmxcsr_clear_call); |
| 27072 | else |
| 27073 | *clear = ldmxcsr_clear_call; |
| 27074 | tree stxmcsr_update_call = build_call_expr (stmxcsr, 0); |
| 27075 | tree exceptions_sse = fold_convert (integer_type_node, |
| 27076 | stxmcsr_update_call); |
| 27077 | if (*update) |
| 27078 | { |
| 27079 | tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node, |
| 27080 | exceptions_var, exceptions_sse); |
| 27081 | tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node, |
| 27082 | exceptions_var, exceptions_mod); |
| 27083 | *update = build2 (COMPOUND_EXPR, integer_type_node, *update, |
| 27084 | exceptions_assign); |
| 27085 | } |
| 27086 | else |
| 27087 | *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var, |
| 27088 | exceptions_sse, NULL_TREE, NULL_TREE); |
| 27089 | tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var); |
| 27090 | *update = build2 (COMPOUND_EXPR, void_type_node, *update, |
| 27091 | ldmxcsr_update_call); |
| 27092 | } |
| 27093 | tree atomic_feraiseexcept |
| 27094 | = builtin_decl_implicit (fncode: BUILT_IN_ATOMIC_FERAISEEXCEPT); |
| 27095 | tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept, |
| 27096 | 1, exceptions_var); |
| 27097 | *update = build2 (COMPOUND_EXPR, void_type_node, *update, |
| 27098 | atomic_feraiseexcept_call); |
| 27099 | } |
| 27100 | |
| 27101 | #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| 27102 | /* For i386, common symbol is local only for non-PIE binaries. For |
| 27103 | x86-64, common symbol is local only for non-PIE binaries or linker |
| 27104 | supports copy reloc in PIE binaries. */ |
| 27105 | |
| 27106 | static bool |
| 27107 | ix86_binds_local_p (const_tree exp) |
| 27108 | { |
| 27109 | bool direct_extern_access |
| 27110 | = (ix86_direct_extern_access |
| 27111 | && !(VAR_OR_FUNCTION_DECL_P (exp) |
| 27112 | && lookup_attribute (attr_name: "nodirect_extern_access" , |
| 27113 | DECL_ATTRIBUTES (exp)))); |
| 27114 | if (!direct_extern_access) |
| 27115 | ix86_has_no_direct_extern_access = true; |
| 27116 | return default_binds_local_p_3 (exp, flag_shlib != 0, true, |
| 27117 | direct_extern_access, |
| 27118 | (direct_extern_access |
| 27119 | && (!flag_pic |
| 27120 | || (TARGET_64BIT |
| 27121 | && HAVE_LD_PIE_COPYRELOC != 0)))); |
| 27122 | } |
| 27123 | |
| 27124 | /* If flag_pic or ix86_direct_extern_access is false, then neither |
| 27125 | local nor global relocs should be placed in readonly memory. */ |
| 27126 | |
| 27127 | static int |
| 27128 | ix86_reloc_rw_mask (void) |
| 27129 | { |
| 27130 | return (flag_pic || !ix86_direct_extern_access) ? 3 : 0; |
| 27131 | } |
| 27132 | #endif |
| 27133 | |
| 27134 | /* Return true iff ADDR can be used as a symbolic base address. */ |
| 27135 | |
| 27136 | static bool |
| 27137 | symbolic_base_address_p (rtx addr) |
| 27138 | { |
| 27139 | if (SYMBOL_REF_P (addr)) |
| 27140 | return true; |
| 27141 | |
| 27142 | if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF) |
| 27143 | return true; |
| 27144 | |
| 27145 | return false; |
| 27146 | } |
| 27147 | |
| 27148 | /* Return true iff ADDR can be used as a base address. */ |
| 27149 | |
| 27150 | static bool |
| 27151 | base_address_p (rtx addr) |
| 27152 | { |
| 27153 | if (REG_P (addr)) |
| 27154 | return true; |
| 27155 | |
| 27156 | if (symbolic_base_address_p (addr)) |
| 27157 | return true; |
| 27158 | |
| 27159 | return false; |
| 27160 | } |
| 27161 | |
| 27162 | /* If MEM is in the form of [(base+symbase)+offset], extract the three |
| 27163 | parts of address and set to BASE, SYMBASE and OFFSET, otherwise |
| 27164 | return false. */ |
| 27165 | |
| 27166 | static bool |
| 27167 | (rtx mem, rtx *base, rtx *symbase, rtx *offset) |
| 27168 | { |
| 27169 | rtx addr; |
| 27170 | |
| 27171 | gcc_assert (MEM_P (mem)); |
| 27172 | |
| 27173 | addr = XEXP (mem, 0); |
| 27174 | |
| 27175 | if (GET_CODE (addr) == CONST) |
| 27176 | addr = XEXP (addr, 0); |
| 27177 | |
| 27178 | if (base_address_p (addr)) |
| 27179 | { |
| 27180 | *base = addr; |
| 27181 | *symbase = const0_rtx; |
| 27182 | *offset = const0_rtx; |
| 27183 | return true; |
| 27184 | } |
| 27185 | |
| 27186 | if (GET_CODE (addr) == PLUS |
| 27187 | && base_address_p (XEXP (addr, 0))) |
| 27188 | { |
| 27189 | rtx addend = XEXP (addr, 1); |
| 27190 | |
| 27191 | if (GET_CODE (addend) == CONST) |
| 27192 | addend = XEXP (addend, 0); |
| 27193 | |
| 27194 | if (CONST_INT_P (addend)) |
| 27195 | { |
| 27196 | *base = XEXP (addr, 0); |
| 27197 | *symbase = const0_rtx; |
| 27198 | *offset = addend; |
| 27199 | return true; |
| 27200 | } |
| 27201 | |
| 27202 | /* Also accept REG + symbolic ref, with or without a CONST_INT |
| 27203 | offset. */ |
| 27204 | if (REG_P (XEXP (addr, 0))) |
| 27205 | { |
| 27206 | if (symbolic_base_address_p (addr: addend)) |
| 27207 | { |
| 27208 | *base = XEXP (addr, 0); |
| 27209 | *symbase = addend; |
| 27210 | *offset = const0_rtx; |
| 27211 | return true; |
| 27212 | } |
| 27213 | |
| 27214 | if (GET_CODE (addend) == PLUS |
| 27215 | && symbolic_base_address_p (XEXP (addend, 0)) |
| 27216 | && CONST_INT_P (XEXP (addend, 1))) |
| 27217 | { |
| 27218 | *base = XEXP (addr, 0); |
| 27219 | *symbase = XEXP (addend, 0); |
| 27220 | *offset = XEXP (addend, 1); |
| 27221 | return true; |
| 27222 | } |
| 27223 | } |
| 27224 | } |
| 27225 | |
| 27226 | return false; |
| 27227 | } |
| 27228 | |
| 27229 | /* Given OPERANDS of consecutive load/store, check if we can merge |
| 27230 | them into move multiple. LOAD is true if they are load instructions. |
| 27231 | MODE is the mode of memory operands. */ |
| 27232 | |
| 27233 | bool |
| 27234 | ix86_operands_ok_for_move_multiple (rtx *operands, bool load, |
| 27235 | machine_mode mode) |
| 27236 | { |
| 27237 | HOST_WIDE_INT offval_1, offval_2, msize; |
| 27238 | rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, |
| 27239 | symbase_1, symbase_2, offset_1, offset_2; |
| 27240 | |
| 27241 | if (load) |
| 27242 | { |
| 27243 | mem_1 = operands[1]; |
| 27244 | mem_2 = operands[3]; |
| 27245 | reg_1 = operands[0]; |
| 27246 | reg_2 = operands[2]; |
| 27247 | } |
| 27248 | else |
| 27249 | { |
| 27250 | mem_1 = operands[0]; |
| 27251 | mem_2 = operands[2]; |
| 27252 | reg_1 = operands[1]; |
| 27253 | reg_2 = operands[3]; |
| 27254 | } |
| 27255 | |
| 27256 | gcc_assert (REG_P (reg_1) && REG_P (reg_2)); |
| 27257 | |
| 27258 | if (REGNO (reg_1) != REGNO (reg_2)) |
| 27259 | return false; |
| 27260 | |
| 27261 | /* Check if the addresses are in the form of [base+offset]. */ |
| 27262 | if (!extract_base_offset_in_addr (mem: mem_1, base: &base_1, symbase: &symbase_1, offset: &offset_1)) |
| 27263 | return false; |
| 27264 | if (!extract_base_offset_in_addr (mem: mem_2, base: &base_2, symbase: &symbase_2, offset: &offset_2)) |
| 27265 | return false; |
| 27266 | |
| 27267 | /* Check if the bases are the same. */ |
| 27268 | if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2)) |
| 27269 | return false; |
| 27270 | |
| 27271 | offval_1 = INTVAL (offset_1); |
| 27272 | offval_2 = INTVAL (offset_2); |
| 27273 | msize = GET_MODE_SIZE (mode); |
| 27274 | /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */ |
| 27275 | if (offval_1 + msize != offval_2) |
| 27276 | return false; |
| 27277 | |
| 27278 | return true; |
| 27279 | } |
| 27280 | |
| 27281 | /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ |
| 27282 | |
| 27283 | static bool |
| 27284 | ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, |
| 27285 | optimization_type opt_type) |
| 27286 | { |
| 27287 | switch (op) |
| 27288 | { |
| 27289 | case asin_optab: |
| 27290 | case acos_optab: |
| 27291 | case log1p_optab: |
| 27292 | case exp_optab: |
| 27293 | case exp10_optab: |
| 27294 | case exp2_optab: |
| 27295 | case expm1_optab: |
| 27296 | case ldexp_optab: |
| 27297 | case scalb_optab: |
| 27298 | case round_optab: |
| 27299 | case lround_optab: |
| 27300 | return opt_type == OPTIMIZE_FOR_SPEED; |
| 27301 | |
| 27302 | case rint_optab: |
| 27303 | if (SSE_FLOAT_MODE_P (mode1) |
| 27304 | && TARGET_SSE_MATH |
| 27305 | && !flag_trapping_math |
| 27306 | && !TARGET_SSE4_1 |
| 27307 | && mode1 != HFmode) |
| 27308 | return opt_type == OPTIMIZE_FOR_SPEED; |
| 27309 | return true; |
| 27310 | |
| 27311 | case floor_optab: |
| 27312 | case ceil_optab: |
| 27313 | case btrunc_optab: |
| 27314 | if ((SSE_FLOAT_MODE_P (mode1) |
| 27315 | && TARGET_SSE_MATH |
| 27316 | && TARGET_SSE4_1) |
| 27317 | || mode1 == HFmode) |
| 27318 | return true; |
| 27319 | return opt_type == OPTIMIZE_FOR_SPEED; |
| 27320 | |
| 27321 | case rsqrt_optab: |
| 27322 | return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode: mode1); |
| 27323 | |
| 27324 | default: |
| 27325 | return true; |
| 27326 | } |
| 27327 | } |
| 27328 | |
| 27329 | /* Address space support. |
| 27330 | |
| 27331 | This is not "far pointers" in the 16-bit sense, but an easy way |
| 27332 | to use %fs and %gs segment prefixes. Therefore: |
| 27333 | |
| 27334 | (a) All address spaces have the same modes, |
| 27335 | (b) All address spaces have the same addresss forms, |
| 27336 | (c) While %fs and %gs are technically subsets of the generic |
| 27337 | address space, they are probably not subsets of each other. |
| 27338 | (d) Since we have no access to the segment base register values |
| 27339 | without resorting to a system call, we cannot convert a |
| 27340 | non-default address space to a default address space. |
| 27341 | Therefore we do not claim %fs or %gs are subsets of generic. |
| 27342 | |
| 27343 | Therefore we can (mostly) use the default hooks. */ |
| 27344 | |
| 27345 | /* All use of segmentation is assumed to make address 0 valid. */ |
| 27346 | |
| 27347 | static bool |
| 27348 | ix86_addr_space_zero_address_valid (addr_space_t as) |
| 27349 | { |
| 27350 | return as != ADDR_SPACE_GENERIC; |
| 27351 | } |
| 27352 | |
| 27353 | static void |
| 27354 | ix86_init_libfuncs (void) |
| 27355 | { |
| 27356 | if (TARGET_64BIT) |
| 27357 | { |
| 27358 | set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4" ); |
| 27359 | set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4" ); |
| 27360 | } |
| 27361 | else |
| 27362 | { |
| 27363 | set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4" ); |
| 27364 | set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4" ); |
| 27365 | } |
| 27366 | |
| 27367 | #if TARGET_MACHO |
| 27368 | darwin_rename_builtins (); |
| 27369 | #endif |
| 27370 | } |
| 27371 | |
| 27372 | /* Set the value of FLT_EVAL_METHOD in float.h. When using only the |
| 27373 | FPU, assume that the fpcw is set to extended precision; when using |
| 27374 | only SSE, rounding is correct; when using both SSE and the FPU, |
| 27375 | the rounding precision is indeterminate, since either may be chosen |
| 27376 | apparently at random. */ |
| 27377 | |
| 27378 | static enum flt_eval_method |
| 27379 | ix86_get_excess_precision (enum excess_precision_type type) |
| 27380 | { |
| 27381 | switch (type) |
| 27382 | { |
| 27383 | case EXCESS_PRECISION_TYPE_FAST: |
| 27384 | /* The fastest type to promote to will always be the native type, |
| 27385 | whether that occurs with implicit excess precision or |
| 27386 | otherwise. */ |
| 27387 | return TARGET_AVX512FP16 |
| 27388 | ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 |
| 27389 | : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; |
| 27390 | case EXCESS_PRECISION_TYPE_STANDARD: |
| 27391 | case EXCESS_PRECISION_TYPE_IMPLICIT: |
| 27392 | /* Otherwise, the excess precision we want when we are |
| 27393 | in a standards compliant mode, and the implicit precision we |
| 27394 | provide would be identical were it not for the unpredictable |
| 27395 | cases. */ |
| 27396 | if (TARGET_AVX512FP16 && TARGET_SSE_MATH) |
| 27397 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16; |
| 27398 | else if (!TARGET_80387) |
| 27399 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; |
| 27400 | else if (!TARGET_MIX_SSE_I387) |
| 27401 | { |
| 27402 | if (!(TARGET_SSE && TARGET_SSE_MATH)) |
| 27403 | return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE; |
| 27404 | else if (TARGET_SSE2) |
| 27405 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; |
| 27406 | } |
| 27407 | |
| 27408 | /* If we are in standards compliant mode, but we know we will |
| 27409 | calculate in unpredictable precision, return |
| 27410 | FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit |
| 27411 | excess precision if the target can't guarantee it will honor |
| 27412 | it. */ |
| 27413 | return (type == EXCESS_PRECISION_TYPE_STANDARD |
| 27414 | ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT |
| 27415 | : FLT_EVAL_METHOD_UNPREDICTABLE); |
| 27416 | case EXCESS_PRECISION_TYPE_FLOAT16: |
| 27417 | if (TARGET_80387 |
| 27418 | && !(TARGET_SSE_MATH && TARGET_SSE)) |
| 27419 | error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>" ); |
| 27420 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16; |
| 27421 | default: |
| 27422 | gcc_unreachable (); |
| 27423 | } |
| 27424 | |
| 27425 | return FLT_EVAL_METHOD_UNPREDICTABLE; |
| 27426 | } |
| 27427 | |
| 27428 | /* Return true if _BitInt(N) is supported and fill its details into *INFO. */ |
| 27429 | bool |
| 27430 | ix86_bitint_type_info (int n, struct bitint_info *info) |
| 27431 | { |
| 27432 | if (n <= 8) |
| 27433 | info->limb_mode = QImode; |
| 27434 | else if (n <= 16) |
| 27435 | info->limb_mode = HImode; |
| 27436 | else if (n <= 32 || (!TARGET_64BIT && n > 64)) |
| 27437 | info->limb_mode = SImode; |
| 27438 | else |
| 27439 | info->limb_mode = DImode; |
| 27440 | info->abi_limb_mode = info->limb_mode; |
| 27441 | info->big_endian = false; |
| 27442 | info->extended = false; |
| 27443 | return true; |
| 27444 | } |
| 27445 | |
| 27446 | /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return DFmode, TFmode |
| 27447 | or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type, |
| 27448 | based on long double bits, go with the default one for the others. */ |
| 27449 | |
| 27450 | static machine_mode |
| 27451 | ix86_c_mode_for_floating_type (enum tree_index ti) |
| 27452 | { |
| 27453 | if (ti == TI_LONG_DOUBLE_TYPE) |
| 27454 | return (TARGET_LONG_DOUBLE_64 ? DFmode |
| 27455 | : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode)); |
| 27456 | return default_mode_for_floating_type (ti); |
| 27457 | } |
| 27458 | |
| 27459 | /* Returns modified FUNCTION_TYPE for cdtor callabi. */ |
| 27460 | tree |
| 27461 | ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype) |
| 27462 | { |
| 27463 | if (TARGET_64BIT |
| 27464 | || TARGET_RTD |
| 27465 | || ix86_function_type_abi (fntype) != MS_ABI) |
| 27466 | return fntype; |
| 27467 | /* For 32-bit MS ABI add thiscall attribute. */ |
| 27468 | tree attribs = tree_cons (get_identifier ("thiscall" ), NULL_TREE, |
| 27469 | TYPE_ATTRIBUTES (fntype)); |
| 27470 | return build_type_attribute_variant (fntype, attribs); |
| 27471 | } |
| 27472 | |
| 27473 | /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that |
| 27474 | decrements by exactly 2 no matter what the position was, there is no pushb. |
| 27475 | |
| 27476 | But as CIE data alignment factor on this arch is -4 for 32bit targets |
| 27477 | and -8 for 64bit targets, we need to make sure all stack pointer adjustments |
| 27478 | are in multiple of 4 for 32bit targets and 8 for 64bit targets. */ |
| 27479 | |
| 27480 | poly_int64 |
| 27481 | ix86_push_rounding (poly_int64 bytes) |
| 27482 | { |
| 27483 | return ROUND_UP (bytes, UNITS_PER_WORD); |
| 27484 | } |
| 27485 | |
| 27486 | /* Use 8 bits metadata start from bit48 for LAM_U48, |
| 27487 | 6 bits metadat start from bit57 for LAM_U57. */ |
| 27488 | #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \ |
| 27489 | ? 48 \ |
| 27490 | : (ix86_lam_type == lam_u57 ? 57 : 0)) |
| 27491 | #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \ |
| 27492 | ? 8 \ |
| 27493 | : (ix86_lam_type == lam_u57 ? 6 : 0)) |
| 27494 | |
| 27495 | /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */ |
| 27496 | bool |
| 27497 | ix86_memtag_can_tag_addresses () |
| 27498 | { |
| 27499 | return ix86_lam_type != lam_none && TARGET_LP64; |
| 27500 | } |
| 27501 | |
| 27502 | /* Implement TARGET_MEMTAG_TAG_BITSIZE. */ |
| 27503 | unsigned char |
| 27504 | ix86_memtag_tag_bitsize () |
| 27505 | { |
| 27506 | return IX86_HWASAN_TAG_SIZE; |
| 27507 | } |
| 27508 | |
| 27509 | /* Implement TARGET_MEMTAG_SET_TAG. */ |
| 27510 | rtx |
| 27511 | ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target) |
| 27512 | { |
| 27513 | /* default_memtag_insert_random_tag may |
| 27514 | generate tag with value more than 6 bits. */ |
| 27515 | if (ix86_lam_type == lam_u57) |
| 27516 | { |
| 27517 | unsigned HOST_WIDE_INT and_imm |
| 27518 | = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1; |
| 27519 | |
| 27520 | emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm))); |
| 27521 | } |
| 27522 | tag = expand_simple_binop (Pmode, ASHIFT, tag, |
| 27523 | GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX, |
| 27524 | /* unsignedp = */1, OPTAB_WIDEN); |
| 27525 | rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target, |
| 27526 | /* unsignedp = */1, OPTAB_DIRECT); |
| 27527 | return ret; |
| 27528 | } |
| 27529 | |
| 27530 | /* Implement TARGET_MEMTAG_EXTRACT_TAG. */ |
| 27531 | rtx |
| 27532 | (rtx tagged_pointer, rtx target) |
| 27533 | { |
| 27534 | rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer, |
| 27535 | GEN_INT (IX86_HWASAN_SHIFT), target, |
| 27536 | /* unsignedp = */0, |
| 27537 | OPTAB_DIRECT); |
| 27538 | rtx ret = gen_reg_rtx (QImode); |
| 27539 | /* Mask off bit63 when LAM_U57. */ |
| 27540 | if (ix86_lam_type == lam_u57) |
| 27541 | { |
| 27542 | unsigned HOST_WIDE_INT and_imm |
| 27543 | = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1; |
| 27544 | emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag), |
| 27545 | gen_int_mode (and_imm, QImode))); |
| 27546 | } |
| 27547 | else |
| 27548 | emit_move_insn (ret, gen_lowpart (QImode, tag)); |
| 27549 | return ret; |
| 27550 | } |
| 27551 | |
| 27552 | /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */ |
| 27553 | rtx |
| 27554 | ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target) |
| 27555 | { |
| 27556 | /* Leave bit63 alone. */ |
| 27557 | rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) |
| 27558 | + (HOST_WIDE_INT_1U << 63) - 1), |
| 27559 | Pmode); |
| 27560 | rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer, |
| 27561 | tag_mask, target, true, |
| 27562 | OPTAB_DIRECT); |
| 27563 | gcc_assert (untagged_base); |
| 27564 | return untagged_base; |
| 27565 | } |
| 27566 | |
| 27567 | /* Implement TARGET_MEMTAG_ADD_TAG. */ |
| 27568 | rtx |
| 27569 | ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset) |
| 27570 | { |
| 27571 | rtx base_tag = gen_reg_rtx (QImode); |
| 27572 | rtx base_addr = gen_reg_rtx (Pmode); |
| 27573 | rtx tagged_addr = gen_reg_rtx (Pmode); |
| 27574 | rtx new_tag = gen_reg_rtx (QImode); |
| 27575 | unsigned HOST_WIDE_INT and_imm |
| 27576 | = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1; |
| 27577 | |
| 27578 | /* When there's "overflow" in tag adding, |
| 27579 | need to mask the most significant bit off. */ |
| 27580 | emit_move_insn (base_tag, ix86_memtag_extract_tag (tagged_pointer: base, NULL_RTX)); |
| 27581 | emit_move_insn (base_addr, |
| 27582 | ix86_memtag_untagged_pointer (tagged_pointer: base, NULL_RTX)); |
| 27583 | emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode))); |
| 27584 | emit_move_insn (new_tag, base_tag); |
| 27585 | emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode))); |
| 27586 | emit_move_insn (tagged_addr, |
| 27587 | ix86_memtag_set_tag (untagged: base_addr, tag: new_tag, NULL_RTX)); |
| 27588 | return plus_constant (Pmode, tagged_addr, offset); |
| 27589 | } |
| 27590 | |
| 27591 | /* Implement TARGET_HAVE_CCMP. */ |
| 27592 | static bool |
| 27593 | ix86_have_ccmp () |
| 27594 | { |
| 27595 | return (bool) TARGET_APX_CCMP; |
| 27596 | } |
| 27597 | |
| 27598 | /* Implement TARGET_MODE_CAN_TRANSFER_BITS. */ |
| 27599 | static bool |
| 27600 | ix86_mode_can_transfer_bits (machine_mode mode) |
| 27601 | { |
| 27602 | if (GET_MODE_CLASS (mode) == MODE_FLOAT |
| 27603 | || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) |
| 27604 | switch (GET_MODE_INNER (mode)) |
| 27605 | { |
| 27606 | case E_SFmode: |
| 27607 | case E_DFmode: |
| 27608 | /* These suffer from normalization upon load when not using SSE. */ |
| 27609 | return !(ix86_fpmath & FPMATH_387); |
| 27610 | default: |
| 27611 | return true; |
| 27612 | } |
| 27613 | |
| 27614 | return true; |
| 27615 | } |
| 27616 | |
| 27617 | /* Implement TARGET_REDZONE_CLOBBER. */ |
| 27618 | static rtx |
| 27619 | ix86_redzone_clobber () |
| 27620 | { |
| 27621 | cfun->machine->asm_redzone_clobber_seen = true; |
| 27622 | if (ix86_using_red_zone ()) |
| 27623 | { |
| 27624 | rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE); |
| 27625 | rtx mem = gen_rtx_MEM (BLKmode, base); |
| 27626 | set_mem_size (mem, RED_ZONE_SIZE); |
| 27627 | return mem; |
| 27628 | } |
| 27629 | return NULL_RTX; |
| 27630 | } |
| 27631 | |
| 27632 | /* Target-specific selftests. */ |
| 27633 | |
| 27634 | #if CHECKING_P |
| 27635 | |
| 27636 | namespace selftest { |
| 27637 | |
| 27638 | /* Verify that hard regs are dumped as expected (in compact mode). */ |
| 27639 | |
| 27640 | static void |
| 27641 | ix86_test_dumping_hard_regs () |
| 27642 | { |
| 27643 | ASSERT_RTL_DUMP_EQ ("(reg:SI ax)" , gen_raw_REG (SImode, 0)); |
| 27644 | ASSERT_RTL_DUMP_EQ ("(reg:SI dx)" , gen_raw_REG (SImode, 1)); |
| 27645 | } |
| 27646 | |
| 27647 | /* Test dumping an insn with repeated references to the same SCRATCH, |
| 27648 | to verify the rtx_reuse code. */ |
| 27649 | |
| 27650 | static void |
| 27651 | ix86_test_dumping_memory_blockage () |
| 27652 | { |
| 27653 | set_new_first_and_last_insn (NULL, NULL); |
| 27654 | |
| 27655 | rtx pat = gen_memory_blockage (); |
| 27656 | rtx_reuse_manager r; |
| 27657 | r.preprocess (x: pat); |
| 27658 | |
| 27659 | /* Verify that the repeated references to the SCRATCH show use |
| 27660 | reuse IDS. The first should be prefixed with a reuse ID, |
| 27661 | and the second should be dumped as a "reuse_rtx" of that ID. |
| 27662 | The expected string assumes Pmode == DImode. */ |
| 27663 | if (Pmode == DImode) |
| 27664 | ASSERT_RTL_DUMP_EQ_WITH_REUSE |
| 27665 | ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n" |
| 27666 | " (unspec:BLK [\n" |
| 27667 | " (mem/v:BLK (reuse_rtx 0) [0 A8])\n" |
| 27668 | " ] UNSPEC_MEMORY_BLOCKAGE)))\n" , pat, &r); |
| 27669 | } |
| 27670 | |
| 27671 | /* Verify loading an RTL dump; specifically a dump of copying |
| 27672 | a param on x86_64 from a hard reg into the frame. |
| 27673 | This test is target-specific since the dump contains target-specific |
| 27674 | hard reg names. */ |
| 27675 | |
| 27676 | static void |
| 27677 | ix86_test_loading_dump_fragment_1 () |
| 27678 | { |
| 27679 | rtl_dump_test t (SELFTEST_LOCATION, |
| 27680 | locate_file (path: "x86_64/copy-hard-reg-into-frame.rtl" )); |
| 27681 | |
| 27682 | rtx_insn *insn = get_insn_by_uid (uid: 1); |
| 27683 | |
| 27684 | /* The block structure and indentation here is purely for |
| 27685 | readability; it mirrors the structure of the rtx. */ |
| 27686 | tree mem_expr; |
| 27687 | { |
| 27688 | rtx pat = PATTERN (insn); |
| 27689 | ASSERT_EQ (SET, GET_CODE (pat)); |
| 27690 | { |
| 27691 | rtx dest = SET_DEST (pat); |
| 27692 | ASSERT_EQ (MEM, GET_CODE (dest)); |
| 27693 | /* Verify the "/c" was parsed. */ |
| 27694 | ASSERT_TRUE (RTX_FLAG (dest, call)); |
| 27695 | ASSERT_EQ (SImode, GET_MODE (dest)); |
| 27696 | { |
| 27697 | rtx addr = XEXP (dest, 0); |
| 27698 | ASSERT_EQ (PLUS, GET_CODE (addr)); |
| 27699 | ASSERT_EQ (DImode, GET_MODE (addr)); |
| 27700 | { |
| 27701 | rtx lhs = XEXP (addr, 0); |
| 27702 | /* Verify that the "frame" REG was consolidated. */ |
| 27703 | ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs); |
| 27704 | } |
| 27705 | { |
| 27706 | rtx rhs = XEXP (addr, 1); |
| 27707 | ASSERT_EQ (CONST_INT, GET_CODE (rhs)); |
| 27708 | ASSERT_EQ (-4, INTVAL (rhs)); |
| 27709 | } |
| 27710 | } |
| 27711 | /* Verify the "[1 i+0 S4 A32]" was parsed. */ |
| 27712 | ASSERT_EQ (1, MEM_ALIAS_SET (dest)); |
| 27713 | /* "i" should have been handled by synthesizing a global int |
| 27714 | variable named "i". */ |
| 27715 | mem_expr = MEM_EXPR (dest); |
| 27716 | ASSERT_NE (mem_expr, NULL); |
| 27717 | ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr)); |
| 27718 | ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr)); |
| 27719 | ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr))); |
| 27720 | ASSERT_STREQ ("i" , IDENTIFIER_POINTER (DECL_NAME (mem_expr))); |
| 27721 | /* "+0". */ |
| 27722 | ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest)); |
| 27723 | ASSERT_EQ (0, MEM_OFFSET (dest)); |
| 27724 | /* "S4". */ |
| 27725 | ASSERT_EQ (4, MEM_SIZE (dest)); |
| 27726 | /* "A32. */ |
| 27727 | ASSERT_EQ (32, MEM_ALIGN (dest)); |
| 27728 | } |
| 27729 | { |
| 27730 | rtx src = SET_SRC (pat); |
| 27731 | ASSERT_EQ (REG, GET_CODE (src)); |
| 27732 | ASSERT_EQ (SImode, GET_MODE (src)); |
| 27733 | ASSERT_EQ (5, REGNO (src)); |
| 27734 | tree reg_expr = REG_EXPR (src); |
| 27735 | /* "i" here should point to the same var as for the MEM_EXPR. */ |
| 27736 | ASSERT_EQ (reg_expr, mem_expr); |
| 27737 | } |
| 27738 | } |
| 27739 | } |
| 27740 | |
| 27741 | /* Verify that the RTL loader copes with a call_insn dump. |
| 27742 | This test is target-specific since the dump contains a target-specific |
| 27743 | hard reg name. */ |
| 27744 | |
| 27745 | static void |
| 27746 | ix86_test_loading_call_insn () |
| 27747 | { |
| 27748 | /* The test dump includes register "xmm0", where requires TARGET_SSE |
| 27749 | to exist. */ |
| 27750 | if (!TARGET_SSE) |
| 27751 | return; |
| 27752 | |
| 27753 | rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/call-insn.rtl" )); |
| 27754 | |
| 27755 | rtx_insn *insn = get_insns (); |
| 27756 | ASSERT_EQ (CALL_INSN, GET_CODE (insn)); |
| 27757 | |
| 27758 | /* "/j". */ |
| 27759 | ASSERT_TRUE (RTX_FLAG (insn, jump)); |
| 27760 | |
| 27761 | rtx pat = PATTERN (insn); |
| 27762 | ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat))); |
| 27763 | |
| 27764 | /* Verify REG_NOTES. */ |
| 27765 | { |
| 27766 | /* "(expr_list:REG_CALL_DECL". */ |
| 27767 | ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn))); |
| 27768 | rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn)); |
| 27769 | ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0)); |
| 27770 | |
| 27771 | /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */ |
| 27772 | rtx_expr_list *note1 = note0->next (); |
| 27773 | ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1)); |
| 27774 | |
| 27775 | ASSERT_EQ (NULL, note1->next ()); |
| 27776 | } |
| 27777 | |
| 27778 | /* Verify CALL_INSN_FUNCTION_USAGE. */ |
| 27779 | { |
| 27780 | /* "(expr_list:DF (use (reg:DF 21 xmm0))". */ |
| 27781 | rtx_expr_list *usage |
| 27782 | = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn)); |
| 27783 | ASSERT_EQ (EXPR_LIST, GET_CODE (usage)); |
| 27784 | ASSERT_EQ (DFmode, GET_MODE (usage)); |
| 27785 | ASSERT_EQ (USE, GET_CODE (usage->element ())); |
| 27786 | ASSERT_EQ (NULL, usage->next ()); |
| 27787 | } |
| 27788 | } |
| 27789 | |
| 27790 | /* Verify that the RTL loader copes a dump from print_rtx_function. |
| 27791 | This test is target-specific since the dump contains target-specific |
| 27792 | hard reg names. */ |
| 27793 | |
| 27794 | static void |
| 27795 | ix86_test_loading_full_dump () |
| 27796 | { |
| 27797 | rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/times-two.rtl" )); |
| 27798 | |
| 27799 | ASSERT_STREQ ("times_two" , IDENTIFIER_POINTER (DECL_NAME (cfun->decl))); |
| 27800 | |
| 27801 | rtx_insn *insn_1 = get_insn_by_uid (uid: 1); |
| 27802 | ASSERT_EQ (NOTE, GET_CODE (insn_1)); |
| 27803 | |
| 27804 | rtx_insn *insn_7 = get_insn_by_uid (uid: 7); |
| 27805 | ASSERT_EQ (INSN, GET_CODE (insn_7)); |
| 27806 | ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7))); |
| 27807 | |
| 27808 | rtx_insn *insn_15 = get_insn_by_uid (uid: 15); |
| 27809 | ASSERT_EQ (INSN, GET_CODE (insn_15)); |
| 27810 | ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15))); |
| 27811 | |
| 27812 | /* Verify crtl->return_rtx. */ |
| 27813 | ASSERT_EQ (REG, GET_CODE (crtl->return_rtx)); |
| 27814 | ASSERT_EQ (0, REGNO (crtl->return_rtx)); |
| 27815 | ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx)); |
| 27816 | } |
| 27817 | |
| 27818 | /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns. |
| 27819 | In particular, verify that it correctly loads the 2nd operand. |
| 27820 | This test is target-specific since these are machine-specific |
| 27821 | operands (and enums). */ |
| 27822 | |
| 27823 | static void |
| 27824 | ix86_test_loading_unspec () |
| 27825 | { |
| 27826 | rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/unspec.rtl" )); |
| 27827 | |
| 27828 | ASSERT_STREQ ("test_unspec" , IDENTIFIER_POINTER (DECL_NAME (cfun->decl))); |
| 27829 | |
| 27830 | ASSERT_TRUE (cfun); |
| 27831 | |
| 27832 | /* Test of an UNSPEC. */ |
| 27833 | rtx_insn *insn = get_insns (); |
| 27834 | ASSERT_EQ (INSN, GET_CODE (insn)); |
| 27835 | rtx set = single_set (insn); |
| 27836 | ASSERT_NE (NULL, set); |
| 27837 | rtx dst = SET_DEST (set); |
| 27838 | ASSERT_EQ (MEM, GET_CODE (dst)); |
| 27839 | rtx src = SET_SRC (set); |
| 27840 | ASSERT_EQ (UNSPEC, GET_CODE (src)); |
| 27841 | ASSERT_EQ (BLKmode, GET_MODE (src)); |
| 27842 | ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1)); |
| 27843 | |
| 27844 | rtx v0 = XVECEXP (src, 0, 0); |
| 27845 | |
| 27846 | /* Verify that the two uses of the first SCRATCH have pointer |
| 27847 | equality. */ |
| 27848 | rtx scratch_a = XEXP (dst, 0); |
| 27849 | ASSERT_EQ (SCRATCH, GET_CODE (scratch_a)); |
| 27850 | |
| 27851 | rtx scratch_b = XEXP (v0, 0); |
| 27852 | ASSERT_EQ (SCRATCH, GET_CODE (scratch_b)); |
| 27853 | |
| 27854 | ASSERT_EQ (scratch_a, scratch_b); |
| 27855 | |
| 27856 | /* Verify that the two mems are thus treated as equal. */ |
| 27857 | ASSERT_TRUE (rtx_equal_p (dst, v0)); |
| 27858 | |
| 27859 | /* Verify that the insn is recognized. */ |
| 27860 | ASSERT_NE(-1, recog_memoized (insn)); |
| 27861 | |
| 27862 | /* Test of an UNSPEC_VOLATILE, which has its own enum values. */ |
| 27863 | insn = NEXT_INSN (insn); |
| 27864 | ASSERT_EQ (INSN, GET_CODE (insn)); |
| 27865 | |
| 27866 | set = single_set (insn); |
| 27867 | ASSERT_NE (NULL, set); |
| 27868 | |
| 27869 | src = SET_SRC (set); |
| 27870 | ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src)); |
| 27871 | ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1)); |
| 27872 | } |
| 27873 | |
| 27874 | /* Run all target-specific selftests. */ |
| 27875 | |
| 27876 | static void |
| 27877 | ix86_run_selftests (void) |
| 27878 | { |
| 27879 | ix86_test_dumping_hard_regs (); |
| 27880 | ix86_test_dumping_memory_blockage (); |
| 27881 | |
| 27882 | /* Various tests of loading RTL dumps, here because they contain |
| 27883 | ix86-isms (e.g. names of hard regs). */ |
| 27884 | ix86_test_loading_dump_fragment_1 (); |
| 27885 | ix86_test_loading_call_insn (); |
| 27886 | ix86_test_loading_full_dump (); |
| 27887 | ix86_test_loading_unspec (); |
| 27888 | } |
| 27889 | |
| 27890 | } // namespace selftest |
| 27891 | |
| 27892 | #endif /* CHECKING_P */ |
| 27893 | |
| 27894 | static const scoped_attribute_specs *const ix86_attribute_table[] = |
| 27895 | { |
| 27896 | &ix86_gnu_attribute_table |
| 27897 | }; |
| 27898 | |
| 27899 | /* Initialize the GCC target structure. */ |
| 27900 | #undef TARGET_RETURN_IN_MEMORY |
| 27901 | #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory |
| 27902 | |
| 27903 | #undef TARGET_LEGITIMIZE_ADDRESS |
| 27904 | #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address |
| 27905 | |
| 27906 | #undef TARGET_ATTRIBUTE_TABLE |
| 27907 | #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table |
| 27908 | #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P |
| 27909 | #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true |
| 27910 | #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| 27911 | # undef TARGET_MERGE_DECL_ATTRIBUTES |
| 27912 | # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes |
| 27913 | #endif |
| 27914 | |
| 27915 | #undef TARGET_INVALID_CONVERSION |
| 27916 | #define TARGET_INVALID_CONVERSION ix86_invalid_conversion |
| 27917 | |
| 27918 | #undef TARGET_INVALID_UNARY_OP |
| 27919 | #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op |
| 27920 | |
| 27921 | #undef TARGET_INVALID_BINARY_OP |
| 27922 | #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op |
| 27923 | |
| 27924 | #undef TARGET_COMP_TYPE_ATTRIBUTES |
| 27925 | #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes |
| 27926 | |
| 27927 | #undef TARGET_INIT_BUILTINS |
| 27928 | #define TARGET_INIT_BUILTINS ix86_init_builtins |
| 27929 | #undef TARGET_BUILTIN_DECL |
| 27930 | #define TARGET_BUILTIN_DECL ix86_builtin_decl |
| 27931 | #undef TARGET_EXPAND_BUILTIN |
| 27932 | #define TARGET_EXPAND_BUILTIN ix86_expand_builtin |
| 27933 | |
| 27934 | #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION |
| 27935 | #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ |
| 27936 | ix86_builtin_vectorized_function |
| 27937 | |
| 27938 | #undef TARGET_VECTORIZE_BUILTIN_GATHER |
| 27939 | #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather |
| 27940 | |
| 27941 | #undef TARGET_VECTORIZE_BUILTIN_SCATTER |
| 27942 | #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter |
| 27943 | |
| 27944 | #undef TARGET_BUILTIN_RECIPROCAL |
| 27945 | #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal |
| 27946 | |
| 27947 | #undef TARGET_ASM_FUNCTION_EPILOGUE |
| 27948 | #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue |
| 27949 | |
| 27950 | #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY |
| 27951 | #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \ |
| 27952 | ix86_print_patchable_function_entry |
| 27953 | |
| 27954 | #undef TARGET_ENCODE_SECTION_INFO |
| 27955 | #ifndef SUBTARGET_ENCODE_SECTION_INFO |
| 27956 | #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info |
| 27957 | #else |
| 27958 | #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO |
| 27959 | #endif |
| 27960 | |
| 27961 | #undef TARGET_ASM_OPEN_PAREN |
| 27962 | #define TARGET_ASM_OPEN_PAREN "" |
| 27963 | #undef TARGET_ASM_CLOSE_PAREN |
| 27964 | #define TARGET_ASM_CLOSE_PAREN "" |
| 27965 | |
| 27966 | #undef TARGET_ASM_BYTE_OP |
| 27967 | #define TARGET_ASM_BYTE_OP ASM_BYTE |
| 27968 | |
| 27969 | #undef TARGET_ASM_ALIGNED_HI_OP |
| 27970 | #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT |
| 27971 | #undef TARGET_ASM_ALIGNED_SI_OP |
| 27972 | #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG |
| 27973 | #ifdef ASM_QUAD |
| 27974 | #undef TARGET_ASM_ALIGNED_DI_OP |
| 27975 | #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD |
| 27976 | #endif |
| 27977 | |
| 27978 | #undef TARGET_PROFILE_BEFORE_PROLOGUE |
| 27979 | #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue |
| 27980 | |
| 27981 | #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME |
| 27982 | #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name |
| 27983 | |
| 27984 | #undef TARGET_ASM_UNALIGNED_HI_OP |
| 27985 | #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP |
| 27986 | #undef TARGET_ASM_UNALIGNED_SI_OP |
| 27987 | #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP |
| 27988 | #undef TARGET_ASM_UNALIGNED_DI_OP |
| 27989 | #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP |
| 27990 | |
| 27991 | #undef TARGET_PRINT_OPERAND |
| 27992 | #define TARGET_PRINT_OPERAND ix86_print_operand |
| 27993 | #undef TARGET_PRINT_OPERAND_ADDRESS |
| 27994 | #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address |
| 27995 | #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P |
| 27996 | #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p |
| 27997 | #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA |
| 27998 | #define i386_asm_output_addr_const_extra |
| 27999 | |
| 28000 | #undef TARGET_SCHED_INIT_GLOBAL |
| 28001 | #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global |
| 28002 | #undef TARGET_SCHED_ADJUST_COST |
| 28003 | #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost |
| 28004 | #undef TARGET_SCHED_ISSUE_RATE |
| 28005 | #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate |
| 28006 | #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD |
| 28007 | #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ |
| 28008 | ia32_multipass_dfa_lookahead |
| 28009 | #undef TARGET_SCHED_MACRO_FUSION_P |
| 28010 | #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p |
| 28011 | #undef TARGET_SCHED_MACRO_FUSION_PAIR_P |
| 28012 | #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p |
| 28013 | |
| 28014 | #undef TARGET_FUNCTION_OK_FOR_SIBCALL |
| 28015 | #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall |
| 28016 | |
| 28017 | #undef TARGET_MEMMODEL_CHECK |
| 28018 | #define TARGET_MEMMODEL_CHECK ix86_memmodel_check |
| 28019 | |
| 28020 | #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV |
| 28021 | #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv |
| 28022 | |
| 28023 | #ifdef HAVE_AS_TLS |
| 28024 | #undef TARGET_HAVE_TLS |
| 28025 | #define TARGET_HAVE_TLS true |
| 28026 | #endif |
| 28027 | #undef TARGET_CANNOT_FORCE_CONST_MEM |
| 28028 | #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem |
| 28029 | #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P |
| 28030 | #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true |
| 28031 | |
| 28032 | #undef TARGET_DELEGITIMIZE_ADDRESS |
| 28033 | #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address |
| 28034 | |
| 28035 | #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P |
| 28036 | #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p |
| 28037 | |
| 28038 | #undef TARGET_MS_BITFIELD_LAYOUT_P |
| 28039 | #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p |
| 28040 | |
| 28041 | #if TARGET_MACHO |
| 28042 | #undef TARGET_BINDS_LOCAL_P |
| 28043 | #define TARGET_BINDS_LOCAL_P darwin_binds_local_p |
| 28044 | #else |
| 28045 | #undef TARGET_BINDS_LOCAL_P |
| 28046 | #define TARGET_BINDS_LOCAL_P ix86_binds_local_p |
| 28047 | #endif |
| 28048 | #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| 28049 | #undef TARGET_BINDS_LOCAL_P |
| 28050 | #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p |
| 28051 | #endif |
| 28052 | |
| 28053 | #undef TARGET_ASM_OUTPUT_MI_THUNK |
| 28054 | #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk |
| 28055 | #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK |
| 28056 | #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk |
| 28057 | |
| 28058 | #undef TARGET_ASM_FILE_START |
| 28059 | #define TARGET_ASM_FILE_START x86_file_start |
| 28060 | |
| 28061 | #undef TARGET_OPTION_OVERRIDE |
| 28062 | #define TARGET_OPTION_OVERRIDE ix86_option_override |
| 28063 | |
| 28064 | #undef TARGET_REGISTER_MOVE_COST |
| 28065 | #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost |
| 28066 | #undef TARGET_MEMORY_MOVE_COST |
| 28067 | #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost |
| 28068 | #undef TARGET_RTX_COSTS |
| 28069 | #define TARGET_RTX_COSTS ix86_rtx_costs |
| 28070 | #undef TARGET_INSN_COST |
| 28071 | #define TARGET_INSN_COST ix86_insn_cost |
| 28072 | #undef TARGET_ADDRESS_COST |
| 28073 | #define TARGET_ADDRESS_COST ix86_address_cost |
| 28074 | |
| 28075 | #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P |
| 28076 | #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ |
| 28077 | ix86_use_by_pieces_infrastructure_p |
| 28078 | |
| 28079 | #undef TARGET_OVERLAP_OP_BY_PIECES_P |
| 28080 | #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true |
| 28081 | |
| 28082 | #undef TARGET_FLAGS_REGNUM |
| 28083 | #define TARGET_FLAGS_REGNUM FLAGS_REG |
| 28084 | #undef TARGET_FIXED_CONDITION_CODE_REGS |
| 28085 | #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs |
| 28086 | #undef TARGET_CC_MODES_COMPATIBLE |
| 28087 | #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible |
| 28088 | |
| 28089 | #undef TARGET_MACHINE_DEPENDENT_REORG |
| 28090 | #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg |
| 28091 | |
| 28092 | #undef TARGET_BUILD_BUILTIN_VA_LIST |
| 28093 | #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list |
| 28094 | |
| 28095 | #undef TARGET_FOLD_BUILTIN |
| 28096 | #define TARGET_FOLD_BUILTIN ix86_fold_builtin |
| 28097 | |
| 28098 | #undef TARGET_GIMPLE_FOLD_BUILTIN |
| 28099 | #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin |
| 28100 | |
| 28101 | #undef TARGET_COMPARE_VERSION_PRIORITY |
| 28102 | #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority |
| 28103 | |
| 28104 | #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY |
| 28105 | #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \ |
| 28106 | ix86_generate_version_dispatcher_body |
| 28107 | |
| 28108 | #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER |
| 28109 | #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \ |
| 28110 | ix86_get_function_versions_dispatcher |
| 28111 | |
| 28112 | #undef TARGET_ENUM_VA_LIST_P |
| 28113 | #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list |
| 28114 | |
| 28115 | #undef TARGET_FN_ABI_VA_LIST |
| 28116 | #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list |
| 28117 | |
| 28118 | #undef TARGET_CANONICAL_VA_LIST_TYPE |
| 28119 | #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type |
| 28120 | |
| 28121 | #undef TARGET_EXPAND_BUILTIN_VA_START |
| 28122 | #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start |
| 28123 | |
| 28124 | #undef TARGET_MD_ASM_ADJUST |
| 28125 | #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust |
| 28126 | |
| 28127 | #undef TARGET_C_EXCESS_PRECISION |
| 28128 | #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision |
| 28129 | #undef TARGET_C_BITINT_TYPE_INFO |
| 28130 | #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info |
| 28131 | #undef TARGET_C_MODE_FOR_FLOATING_TYPE |
| 28132 | #define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type |
| 28133 | #undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE |
| 28134 | #define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype |
| 28135 | #undef TARGET_PROMOTE_PROTOTYPES |
| 28136 | #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true |
| 28137 | #undef TARGET_PUSH_ARGUMENT |
| 28138 | #define TARGET_PUSH_ARGUMENT ix86_push_argument |
| 28139 | #undef TARGET_SETUP_INCOMING_VARARGS |
| 28140 | #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs |
| 28141 | #undef TARGET_MUST_PASS_IN_STACK |
| 28142 | #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack |
| 28143 | #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS |
| 28144 | #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args |
| 28145 | #undef TARGET_FUNCTION_ARG_ADVANCE |
| 28146 | #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance |
| 28147 | #undef TARGET_FUNCTION_ARG |
| 28148 | #define TARGET_FUNCTION_ARG ix86_function_arg |
| 28149 | #undef TARGET_INIT_PIC_REG |
| 28150 | #define TARGET_INIT_PIC_REG ix86_init_pic_reg |
| 28151 | #undef TARGET_USE_PSEUDO_PIC_REG |
| 28152 | #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg |
| 28153 | #undef TARGET_FUNCTION_ARG_BOUNDARY |
| 28154 | #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary |
| 28155 | #undef TARGET_PASS_BY_REFERENCE |
| 28156 | #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference |
| 28157 | #undef TARGET_INTERNAL_ARG_POINTER |
| 28158 | #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer |
| 28159 | #undef TARGET_UPDATE_STACK_BOUNDARY |
| 28160 | #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary |
| 28161 | #undef TARGET_GET_DRAP_RTX |
| 28162 | #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx |
| 28163 | #undef TARGET_STRICT_ARGUMENT_NAMING |
| 28164 | #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true |
| 28165 | #undef TARGET_STATIC_CHAIN |
| 28166 | #define TARGET_STATIC_CHAIN ix86_static_chain |
| 28167 | #undef TARGET_TRAMPOLINE_INIT |
| 28168 | #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init |
| 28169 | #undef TARGET_RETURN_POPS_ARGS |
| 28170 | #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args |
| 28171 | |
| 28172 | #undef TARGET_WARN_FUNC_RETURN |
| 28173 | #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return |
| 28174 | |
| 28175 | #undef TARGET_LEGITIMATE_COMBINED_INSN |
| 28176 | #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn |
| 28177 | |
| 28178 | #undef TARGET_ASAN_SHADOW_OFFSET |
| 28179 | #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset |
| 28180 | |
| 28181 | #undef TARGET_GIMPLIFY_VA_ARG_EXPR |
| 28182 | #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg |
| 28183 | |
| 28184 | #undef TARGET_SCALAR_MODE_SUPPORTED_P |
| 28185 | #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p |
| 28186 | |
| 28187 | #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P |
| 28188 | #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ |
| 28189 | ix86_libgcc_floating_mode_supported_p |
| 28190 | |
| 28191 | #undef TARGET_VECTOR_MODE_SUPPORTED_P |
| 28192 | #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p |
| 28193 | |
| 28194 | #undef TARGET_C_MODE_FOR_SUFFIX |
| 28195 | #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix |
| 28196 | |
| 28197 | #ifdef HAVE_AS_TLS |
| 28198 | #undef TARGET_ASM_OUTPUT_DWARF_DTPREL |
| 28199 | #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel |
| 28200 | #endif |
| 28201 | |
| 28202 | #ifdef SUBTARGET_INSERT_ATTRIBUTES |
| 28203 | #undef TARGET_INSERT_ATTRIBUTES |
| 28204 | #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES |
| 28205 | #endif |
| 28206 | |
| 28207 | #undef TARGET_MANGLE_TYPE |
| 28208 | #define TARGET_MANGLE_TYPE ix86_mangle_type |
| 28209 | |
| 28210 | #undef TARGET_EMIT_SUPPORT_TINFOS |
| 28211 | #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos |
| 28212 | |
| 28213 | #undef TARGET_STACK_PROTECT_GUARD |
| 28214 | #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard |
| 28215 | |
| 28216 | #undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P |
| 28217 | #define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \ |
| 28218 | ix86_stack_protect_runtime_enabled_p |
| 28219 | |
| 28220 | #if !TARGET_MACHO |
| 28221 | #undef TARGET_STACK_PROTECT_FAIL |
| 28222 | #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail |
| 28223 | #endif |
| 28224 | |
| 28225 | #undef TARGET_FUNCTION_VALUE |
| 28226 | #define TARGET_FUNCTION_VALUE ix86_function_value |
| 28227 | |
| 28228 | #undef TARGET_FUNCTION_VALUE_REGNO_P |
| 28229 | #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p |
| 28230 | |
| 28231 | #undef TARGET_ZERO_CALL_USED_REGS |
| 28232 | #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs |
| 28233 | |
| 28234 | #undef TARGET_PROMOTE_FUNCTION_MODE |
| 28235 | #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode |
| 28236 | |
| 28237 | #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE |
| 28238 | #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change |
| 28239 | |
| 28240 | #undef TARGET_MEMBER_TYPE_FORCES_BLK |
| 28241 | #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk |
| 28242 | |
| 28243 | #undef TARGET_INSTANTIATE_DECLS |
| 28244 | #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls |
| 28245 | |
| 28246 | #undef TARGET_SECONDARY_RELOAD |
| 28247 | #define TARGET_SECONDARY_RELOAD ix86_secondary_reload |
| 28248 | #undef TARGET_SECONDARY_MEMORY_NEEDED |
| 28249 | #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed |
| 28250 | #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE |
| 28251 | #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode |
| 28252 | |
| 28253 | #undef TARGET_CLASS_MAX_NREGS |
| 28254 | #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs |
| 28255 | |
| 28256 | #undef TARGET_PREFERRED_RELOAD_CLASS |
| 28257 | #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class |
| 28258 | #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS |
| 28259 | #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class |
| 28260 | /* When this hook returns true for MODE, the compiler allows |
| 28261 | registers explicitly used in the rtl to be used as spill registers |
| 28262 | but prevents the compiler from extending the lifetime of these |
| 28263 | registers. */ |
| 28264 | #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P |
| 28265 | #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true |
| 28266 | #undef TARGET_CLASS_LIKELY_SPILLED_P |
| 28267 | #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p |
| 28268 | #undef TARGET_CALLEE_SAVE_COST |
| 28269 | #define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost |
| 28270 | |
| 28271 | #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST |
| 28272 | #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ |
| 28273 | ix86_builtin_vectorization_cost |
| 28274 | #undef TARGET_VECTORIZE_VEC_PERM_CONST |
| 28275 | #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const |
| 28276 | #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE |
| 28277 | #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ |
| 28278 | ix86_preferred_simd_mode |
| 28279 | #undef TARGET_VECTORIZE_SPLIT_REDUCTION |
| 28280 | #define TARGET_VECTORIZE_SPLIT_REDUCTION \ |
| 28281 | ix86_split_reduction |
| 28282 | #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES |
| 28283 | #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ |
| 28284 | ix86_autovectorize_vector_modes |
| 28285 | #undef TARGET_VECTORIZE_GET_MASK_MODE |
| 28286 | #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode |
| 28287 | #undef TARGET_VECTORIZE_CREATE_COSTS |
| 28288 | #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs |
| 28289 | |
| 28290 | #undef TARGET_SET_CURRENT_FUNCTION |
| 28291 | #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function |
| 28292 | |
| 28293 | #undef TARGET_OPTION_VALID_ATTRIBUTE_P |
| 28294 | #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p |
| 28295 | |
| 28296 | #undef TARGET_OPTION_SAVE |
| 28297 | #define TARGET_OPTION_SAVE ix86_function_specific_save |
| 28298 | |
| 28299 | #undef TARGET_OPTION_RESTORE |
| 28300 | #define TARGET_OPTION_RESTORE ix86_function_specific_restore |
| 28301 | |
| 28302 | #undef TARGET_OPTION_POST_STREAM_IN |
| 28303 | #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in |
| 28304 | |
| 28305 | #undef TARGET_OPTION_PRINT |
| 28306 | #define TARGET_OPTION_PRINT ix86_function_specific_print |
| 28307 | |
| 28308 | #undef TARGET_CAN_INLINE_P |
| 28309 | #define TARGET_CAN_INLINE_P ix86_can_inline_p |
| 28310 | |
| 28311 | #undef TARGET_LEGITIMATE_ADDRESS_P |
| 28312 | #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p |
| 28313 | |
| 28314 | #undef TARGET_REGISTER_PRIORITY |
| 28315 | #define TARGET_REGISTER_PRIORITY ix86_register_priority |
| 28316 | |
| 28317 | #undef TARGET_REGISTER_USAGE_LEVELING_P |
| 28318 | #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true |
| 28319 | |
| 28320 | #undef TARGET_LEGITIMATE_CONSTANT_P |
| 28321 | #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p |
| 28322 | |
| 28323 | #undef TARGET_COMPUTE_FRAME_LAYOUT |
| 28324 | #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout |
| 28325 | |
| 28326 | #undef TARGET_FRAME_POINTER_REQUIRED |
| 28327 | #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required |
| 28328 | |
| 28329 | #undef TARGET_CAN_ELIMINATE |
| 28330 | #define TARGET_CAN_ELIMINATE ix86_can_eliminate |
| 28331 | |
| 28332 | #undef TARGET_EXTRA_LIVE_ON_ENTRY |
| 28333 | #define ix86_live_on_entry |
| 28334 | |
| 28335 | #undef TARGET_ASM_CODE_END |
| 28336 | #define TARGET_ASM_CODE_END ix86_code_end |
| 28337 | |
| 28338 | #undef TARGET_CONDITIONAL_REGISTER_USAGE |
| 28339 | #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage |
| 28340 | |
| 28341 | #undef TARGET_CANONICALIZE_COMPARISON |
| 28342 | #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison |
| 28343 | |
| 28344 | #undef TARGET_LOOP_UNROLL_ADJUST |
| 28345 | #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust |
| 28346 | |
| 28347 | /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */ |
| 28348 | #undef TARGET_SPILL_CLASS |
| 28349 | #define TARGET_SPILL_CLASS ix86_spill_class |
| 28350 | |
| 28351 | #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN |
| 28352 | #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \ |
| 28353 | ix86_simd_clone_compute_vecsize_and_simdlen |
| 28354 | |
| 28355 | #undef TARGET_SIMD_CLONE_ADJUST |
| 28356 | #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust |
| 28357 | |
| 28358 | #undef TARGET_SIMD_CLONE_USABLE |
| 28359 | #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable |
| 28360 | |
| 28361 | #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA |
| 28362 | #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa |
| 28363 | |
| 28364 | #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P |
| 28365 | #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ |
| 28366 | ix86_float_exceptions_rounding_supported_p |
| 28367 | |
| 28368 | #undef TARGET_MODE_EMIT |
| 28369 | #define TARGET_MODE_EMIT ix86_emit_mode_set |
| 28370 | |
| 28371 | #undef TARGET_MODE_NEEDED |
| 28372 | #define TARGET_MODE_NEEDED ix86_mode_needed |
| 28373 | |
| 28374 | #undef TARGET_MODE_AFTER |
| 28375 | #define TARGET_MODE_AFTER ix86_mode_after |
| 28376 | |
| 28377 | #undef TARGET_MODE_ENTRY |
| 28378 | #define TARGET_MODE_ENTRY ix86_mode_entry |
| 28379 | |
| 28380 | #undef TARGET_MODE_EXIT |
| 28381 | #define TARGET_MODE_EXIT ix86_mode_exit |
| 28382 | |
| 28383 | #undef TARGET_MODE_PRIORITY |
| 28384 | #define TARGET_MODE_PRIORITY ix86_mode_priority |
| 28385 | |
| 28386 | #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS |
| 28387 | #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true |
| 28388 | |
| 28389 | #undef TARGET_OFFLOAD_OPTIONS |
| 28390 | #define TARGET_OFFLOAD_OPTIONS \ |
| 28391 | ix86_offload_options |
| 28392 | |
| 28393 | #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT |
| 28394 | #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512 |
| 28395 | |
| 28396 | #undef TARGET_OPTAB_SUPPORTED_P |
| 28397 | #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p |
| 28398 | |
| 28399 | #undef TARGET_HARD_REGNO_SCRATCH_OK |
| 28400 | #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok |
| 28401 | |
| 28402 | #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS |
| 28403 | #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST |
| 28404 | |
| 28405 | #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID |
| 28406 | #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid |
| 28407 | |
| 28408 | #undef TARGET_INIT_LIBFUNCS |
| 28409 | #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs |
| 28410 | |
| 28411 | #undef TARGET_EXPAND_DIVMOD_LIBFUNC |
| 28412 | #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc |
| 28413 | |
| 28414 | #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST |
| 28415 | #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost |
| 28416 | |
| 28417 | #undef TARGET_NOCE_CONVERSION_PROFITABLE_P |
| 28418 | #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p |
| 28419 | |
| 28420 | #undef TARGET_HARD_REGNO_NREGS |
| 28421 | #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs |
| 28422 | #undef TARGET_HARD_REGNO_MODE_OK |
| 28423 | #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok |
| 28424 | |
| 28425 | #undef TARGET_MODES_TIEABLE_P |
| 28426 | #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p |
| 28427 | |
| 28428 | #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED |
| 28429 | #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ |
| 28430 | ix86_hard_regno_call_part_clobbered |
| 28431 | |
| 28432 | #undef TARGET_INSN_CALLEE_ABI |
| 28433 | #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi |
| 28434 | |
| 28435 | #undef TARGET_CAN_CHANGE_MODE_CLASS |
| 28436 | #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class |
| 28437 | |
| 28438 | #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT |
| 28439 | #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment |
| 28440 | |
| 28441 | #undef TARGET_STATIC_RTX_ALIGNMENT |
| 28442 | #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment |
| 28443 | #undef TARGET_CONSTANT_ALIGNMENT |
| 28444 | #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment |
| 28445 | |
| 28446 | #undef TARGET_EMPTY_RECORD_P |
| 28447 | #define TARGET_EMPTY_RECORD_P ix86_is_empty_record |
| 28448 | |
| 28449 | #undef TARGET_WARN_PARAMETER_PASSING_ABI |
| 28450 | #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi |
| 28451 | |
| 28452 | #undef TARGET_GET_MULTILIB_ABI_NAME |
| 28453 | #define TARGET_GET_MULTILIB_ABI_NAME \ |
| 28454 | ix86_get_multilib_abi_name |
| 28455 | |
| 28456 | #undef TARGET_IFUNC_REF_LOCAL_OK |
| 28457 | #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok |
| 28458 | |
| 28459 | #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| 28460 | # undef TARGET_ASM_RELOC_RW_MASK |
| 28461 | # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask |
| 28462 | #endif |
| 28463 | |
| 28464 | #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES |
| 28465 | #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses |
| 28466 | |
| 28467 | #undef TARGET_MEMTAG_ADD_TAG |
| 28468 | #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag |
| 28469 | |
| 28470 | #undef TARGET_MEMTAG_SET_TAG |
| 28471 | #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag |
| 28472 | |
| 28473 | #undef TARGET_MEMTAG_EXTRACT_TAG |
| 28474 | #define ix86_memtag_extract_tag |
| 28475 | |
| 28476 | #undef TARGET_MEMTAG_UNTAGGED_POINTER |
| 28477 | #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer |
| 28478 | |
| 28479 | #undef TARGET_MEMTAG_TAG_BITSIZE |
| 28480 | #define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize |
| 28481 | |
| 28482 | #undef TARGET_GEN_CCMP_FIRST |
| 28483 | #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first |
| 28484 | |
| 28485 | #undef TARGET_GEN_CCMP_NEXT |
| 28486 | #define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next |
| 28487 | |
| 28488 | #undef TARGET_HAVE_CCMP |
| 28489 | #define TARGET_HAVE_CCMP ix86_have_ccmp |
| 28490 | |
| 28491 | #undef TARGET_MODE_CAN_TRANSFER_BITS |
| 28492 | #define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits |
| 28493 | |
| 28494 | #undef TARGET_REDZONE_CLOBBER |
| 28495 | #define TARGET_REDZONE_CLOBBER ix86_redzone_clobber |
| 28496 | |
| 28497 | static bool |
| 28498 | ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED) |
| 28499 | { |
| 28500 | #ifdef OPTION_GLIBC |
| 28501 | if (OPTION_GLIBC) |
| 28502 | return (built_in_function)fcode == BUILT_IN_MEMPCPY; |
| 28503 | else |
| 28504 | return false; |
| 28505 | #else |
| 28506 | return false; |
| 28507 | #endif |
| 28508 | } |
| 28509 | |
| 28510 | #undef TARGET_LIBC_HAS_FAST_FUNCTION |
| 28511 | #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function |
| 28512 | |
| 28513 | static unsigned |
| 28514 | ix86_libm_function_max_error (unsigned cfn, machine_mode mode, |
| 28515 | bool boundary_p) |
| 28516 | { |
| 28517 | #ifdef OPTION_GLIBC |
| 28518 | bool glibc_p = OPTION_GLIBC; |
| 28519 | #else |
| 28520 | bool glibc_p = false; |
| 28521 | #endif |
| 28522 | if (glibc_p) |
| 28523 | { |
| 28524 | /* If __FAST_MATH__ is defined, glibc provides libmvec. */ |
| 28525 | unsigned int libmvec_ret = 0; |
| 28526 | if (!flag_trapping_math |
| 28527 | && flag_unsafe_math_optimizations |
| 28528 | && flag_finite_math_only |
| 28529 | && !flag_signed_zeros |
| 28530 | && !flag_errno_math) |
| 28531 | switch (cfn) |
| 28532 | { |
| 28533 | CASE_CFN_COS: |
| 28534 | CASE_CFN_COS_FN: |
| 28535 | CASE_CFN_SIN: |
| 28536 | CASE_CFN_SIN_FN: |
| 28537 | if (!boundary_p) |
| 28538 | { |
| 28539 | /* With non-default rounding modes, libmvec provides |
| 28540 | complete garbage in results. E.g. |
| 28541 | _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD |
| 28542 | returns 0.00333309174f rather than 1.40129846e-45f. */ |
| 28543 | if (flag_rounding_math) |
| 28544 | return ~0U; |
| 28545 | /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html |
| 28546 | claims libmvec maximum error is 4ulps. |
| 28547 | My own random testing indicates 2ulps for SFmode and |
| 28548 | 0.5ulps for DFmode, but let's go with the 4ulps. */ |
| 28549 | libmvec_ret = 4; |
| 28550 | } |
| 28551 | break; |
| 28552 | default: |
| 28553 | break; |
| 28554 | } |
| 28555 | unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode, |
| 28556 | boundary_p); |
| 28557 | return MAX (ret, libmvec_ret); |
| 28558 | } |
| 28559 | return default_libm_function_max_error (cfn, mode, boundary_p); |
| 28560 | } |
| 28561 | |
| 28562 | #undef TARGET_LIBM_FUNCTION_MAX_ERROR |
| 28563 | #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error |
| 28564 | |
| 28565 | #if TARGET_MACHO |
| 28566 | static bool |
| 28567 | ix86_cannot_copy_insn_p (rtx_insn *insn) |
| 28568 | { |
| 28569 | if (TARGET_64BIT) |
| 28570 | return false; |
| 28571 | |
| 28572 | rtx set = single_set (insn); |
| 28573 | if (set) |
| 28574 | { |
| 28575 | rtx src = SET_SRC (set); |
| 28576 | if (GET_CODE (src) == UNSPEC |
| 28577 | && XINT (src, 1) == UNSPEC_SET_GOT) |
| 28578 | return true; |
| 28579 | } |
| 28580 | return false; |
| 28581 | } |
| 28582 | |
| 28583 | #undef TARGET_CANNOT_COPY_INSN_P |
| 28584 | #define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p |
| 28585 | |
| 28586 | #endif |
| 28587 | |
| 28588 | #if CHECKING_P |
| 28589 | #undef TARGET_RUN_TARGET_SELFTESTS |
| 28590 | #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests |
| 28591 | #endif /* #if CHECKING_P */ |
| 28592 | |
| 28593 | #undef TARGET_DOCUMENTATION_NAME |
| 28594 | #define TARGET_DOCUMENTATION_NAME "x86" |
| 28595 | |
| 28596 | /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */ |
| 28597 | sbitmap |
| 28598 | ix86_get_separate_components (void) |
| 28599 | { |
| 28600 | HOST_WIDE_INT offset, to_allocate; |
| 28601 | sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER); |
| 28602 | bitmap_clear (components); |
| 28603 | struct machine_function *m = cfun->machine; |
| 28604 | |
| 28605 | offset = m->frame.stack_pointer_offset; |
| 28606 | to_allocate = offset - m->frame.sse_reg_save_offset; |
| 28607 | |
| 28608 | /* Shrink wrap separate uses MOV, which means APX PPX cannot be used. |
| 28609 | Experiments show that APX PPX can speed up the prologue. If the function |
| 28610 | does not exit early during actual execution, then using APX PPX is faster. |
| 28611 | If the function always exits early during actual execution, then shrink |
| 28612 | wrap separate reduces the number of MOV (PUSH/POP) instructions actually |
| 28613 | executed, thus speeding up execution. |
| 28614 | foo: |
| 28615 | movl $1, %eax |
| 28616 | testq %rdi, %rdi |
| 28617 | jne.L60 |
| 28618 | ret ---> early return. |
| 28619 | .L60: |
| 28620 | subq $88, %rsp ---> belong to prologue. |
| 28621 | xorl %eax, %eax |
| 28622 | movq %rbx, 40 (%rsp) ---> belong to prologue. |
| 28623 | movq 8 (%rdi), %rbx |
| 28624 | movq %rbp, 48 (%rsp) ---> belong to prologue. |
| 28625 | movq %rdi, %rbp |
| 28626 | testq %rbx, %rbx |
| 28627 | jne.L61 |
| 28628 | movq 40 (%rsp), %rbx |
| 28629 | movq 48 (%rsp), %rbp |
| 28630 | addq $88, %rsp |
| 28631 | ret |
| 28632 | .L61: |
| 28633 | movq %r12, 56 (%rsp) ---> belong to prologue. |
| 28634 | movq %r13, 64 (%rsp) ---> belong to prologue. |
| 28635 | movq %r14, 72 (%rsp) ---> belong to prologue. |
| 28636 | ... ... |
| 28637 | |
| 28638 | Disable shrink wrap separate when PPX is enabled. */ |
| 28639 | if ((TARGET_APX_PPX && !crtl->calls_eh_return) |
| 28640 | || cfun->machine->func_type != TYPE_NORMAL |
| 28641 | || TARGET_SEH |
| 28642 | || crtl->stack_realign_needed |
| 28643 | || m->call_ms2sysv) |
| 28644 | return components; |
| 28645 | |
| 28646 | /* Since shrink wrapping separate uses MOV instead of PUSH/POP. |
| 28647 | Disable shrink wrap separate when MOV is prohibited. */ |
| 28648 | if (save_regs_using_push_pop (to_allocate)) |
| 28649 | return components; |
| 28650 | |
| 28651 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 28652 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
| 28653 | { |
| 28654 | /* Skip registers with large offsets, where a pseudo may be needed. */ |
| 28655 | if (IN_RANGE (offset, -0x8000, 0x7fff)) |
| 28656 | bitmap_set_bit (map: components, bitno: regno); |
| 28657 | offset += UNITS_PER_WORD; |
| 28658 | } |
| 28659 | |
| 28660 | /* Don't mess with the following registers. */ |
| 28661 | if (frame_pointer_needed) |
| 28662 | bitmap_clear_bit (map: components, HARD_FRAME_POINTER_REGNUM); |
| 28663 | |
| 28664 | if (crtl->drap_reg) |
| 28665 | bitmap_clear_bit (map: components, REGNO (crtl->drap_reg)); |
| 28666 | |
| 28667 | if (pic_offset_table_rtx) |
| 28668 | bitmap_clear_bit (map: components, REAL_PIC_OFFSET_TABLE_REGNUM); |
| 28669 | |
| 28670 | return components; |
| 28671 | } |
| 28672 | |
| 28673 | /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */ |
| 28674 | sbitmap |
| 28675 | ix86_components_for_bb (basic_block bb) |
| 28676 | { |
| 28677 | bitmap in = DF_LIVE_IN (bb); |
| 28678 | bitmap gen = &DF_LIVE_BB_INFO (bb)->gen; |
| 28679 | bitmap kill = &DF_LIVE_BB_INFO (bb)->kill; |
| 28680 | |
| 28681 | sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER); |
| 28682 | bitmap_clear (components); |
| 28683 | |
| 28684 | function_abi_aggregator callee_abis; |
| 28685 | rtx_insn *insn; |
| 28686 | FOR_BB_INSNS (bb, insn) |
| 28687 | if (CALL_P (insn)) |
| 28688 | callee_abis.note_callee_abi (abi: insn_callee_abi (insn)); |
| 28689 | HARD_REG_SET = callee_abis.caller_save_regs (*crtl->abi); |
| 28690 | |
| 28691 | /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */ |
| 28692 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 28693 | if (!fixed_regs[regno] |
| 28694 | && (TEST_HARD_REG_BIT (set: extra_caller_saves, bit: regno) |
| 28695 | || bitmap_bit_p (in, regno) |
| 28696 | || bitmap_bit_p (gen, regno) |
| 28697 | || bitmap_bit_p (kill, regno))) |
| 28698 | bitmap_set_bit (map: components, bitno: regno); |
| 28699 | |
| 28700 | return components; |
| 28701 | } |
| 28702 | |
| 28703 | /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */ |
| 28704 | void |
| 28705 | ix86_disqualify_components (sbitmap, edge, sbitmap, bool) |
| 28706 | { |
| 28707 | /* Nothing to do for x86. */ |
| 28708 | } |
| 28709 | |
| 28710 | /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */ |
| 28711 | void |
| 28712 | ix86_emit_prologue_components (sbitmap components) |
| 28713 | { |
| 28714 | HOST_WIDE_INT cfa_offset; |
| 28715 | struct machine_function *m = cfun->machine; |
| 28716 | |
| 28717 | cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset |
| 28718 | - m->frame.stack_pointer_offset; |
| 28719 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 28720 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
| 28721 | { |
| 28722 | if (bitmap_bit_p (map: components, bitno: regno)) |
| 28723 | ix86_emit_save_reg_using_mov (mode: word_mode, regno, cfa_offset); |
| 28724 | cfa_offset -= UNITS_PER_WORD; |
| 28725 | } |
| 28726 | } |
| 28727 | |
| 28728 | /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */ |
| 28729 | void |
| 28730 | ix86_emit_epilogue_components (sbitmap components) |
| 28731 | { |
| 28732 | HOST_WIDE_INT cfa_offset; |
| 28733 | struct machine_function *m = cfun->machine; |
| 28734 | cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset |
| 28735 | - m->frame.stack_pointer_offset; |
| 28736 | |
| 28737 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 28738 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
| 28739 | { |
| 28740 | if (bitmap_bit_p (map: components, bitno: regno)) |
| 28741 | { |
| 28742 | rtx reg = gen_rtx_REG (word_mode, regno); |
| 28743 | rtx mem; |
| 28744 | rtx_insn *insn; |
| 28745 | |
| 28746 | mem = choose_baseaddr (cfa_offset, NULL); |
| 28747 | mem = gen_frame_mem (word_mode, mem); |
| 28748 | insn = emit_move_insn (reg, mem); |
| 28749 | |
| 28750 | RTX_FRAME_RELATED_P (insn) = 1; |
| 28751 | add_reg_note (insn, REG_CFA_RESTORE, reg); |
| 28752 | } |
| 28753 | cfa_offset -= UNITS_PER_WORD; |
| 28754 | } |
| 28755 | } |
| 28756 | |
| 28757 | /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */ |
| 28758 | void |
| 28759 | ix86_set_handled_components (sbitmap components) |
| 28760 | { |
| 28761 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| 28762 | if (bitmap_bit_p (map: components, bitno: regno)) |
| 28763 | { |
| 28764 | cfun->machine->reg_is_wrapped_separately[regno] = true; |
| 28765 | cfun->machine->use_fast_prologue_epilogue = true; |
| 28766 | cfun->machine->frame.save_regs_using_mov = true; |
| 28767 | } |
| 28768 | } |
| 28769 | |
| 28770 | #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS |
| 28771 | #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components |
| 28772 | #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB |
| 28773 | #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb |
| 28774 | #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS |
| 28775 | #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components |
| 28776 | #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS |
| 28777 | #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \ |
| 28778 | ix86_emit_prologue_components |
| 28779 | #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS |
| 28780 | #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \ |
| 28781 | ix86_emit_epilogue_components |
| 28782 | #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS |
| 28783 | #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components |
| 28784 | |
| 28785 | struct gcc_target targetm = TARGET_INITIALIZER; |
| 28786 | |
| 28787 | #include "gt-i386.h" |
| 28788 | |