Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 262603 | Differences between
and this patch

Collapse All | Expand All

(-)gcc/doc/md.texi (+5 lines)
Lines 7505-7510 Link Here
7505
recognize complicated bypasses, e.g.@: when the consumer is only an address
7505
recognize complicated bypasses, e.g.@: when the consumer is only an address
7506
of insn @samp{store} (not a stored value).
7506
of insn @samp{store} (not a stored value).
7507
7507
7508
If there are more one bypass with the same output and input insns, the
7509
chosen bypass is the first bypass with a guard in description whose
7510
guard function returns nonzero.  If there is no such bypass, then
7511
bypass without the guard function is chosen.
7512
7508
@findex exclusion_set
7513
@findex exclusion_set
7509
@findex presence_set
7514
@findex presence_set
7510
@findex final_presence_set
7515
@findex final_presence_set
(-)gcc/rtlanal.c (+123 lines)
Lines 728-733 Link Here
728
    }
728
    }
729
  return 0;
729
  return 0;
730
}
730
}
731
732
static int
733
reg_mentioned_by_mem_p_1 (const_rtx reg, const_rtx in,
734
			  bool *mem_p)
735
{
736
  const char *fmt;
737
  int i;
738
  enum rtx_code code;
739
740
  if (in == 0)
741
    return 0;
742
743
  if (reg == in)
744
    return 1;
745
746
  if (GET_CODE (in) == LABEL_REF)
747
    return reg == XEXP (in, 0);
748
749
  code = GET_CODE (in);
750
751
  switch (code)
752
    {
753
      /* Compare registers by number.  */
754
    case REG:
755
      return REG_P (reg) && REGNO (in) == REGNO (reg);
756
757
      /* These codes have no constituent expressions
758
	 and are unique.  */
759
    case SCRATCH:
760
    case CC0:
761
    case PC:
762
      return 0;
763
764
    case CONST_INT:
765
    case CONST_VECTOR:
766
    case CONST_DOUBLE:
767
    case CONST_FIXED:
768
      /* These are kept unique for a given value.  */
769
      return 0;
770
771
    default:
772
      break;
773
    }
774
775
  if (GET_CODE (reg) == code && rtx_equal_p (reg, in))
776
    return 1;
777
778
  fmt = GET_RTX_FORMAT (code);
779
780
  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
781
    {
782
      if (fmt[i] == 'E')
783
	{
784
	  int j;
785
	  for (j = XVECLEN (in, i) - 1; j >= 0; j--)
786
	    if (reg_mentioned_by_mem_p_1 (reg, XVECEXP (in, i, j), mem_p))
787
              {
788
                if (code == MEM)
789
                  *mem_p = true;
790
791
	        return 1;
792
              }
793
	}
794
      else if (fmt[i] == 'e'
795
	       && reg_mentioned_by_mem_p_1 (reg, XEXP (in, i), mem_p))
796
	{
797
	  if (code == MEM)
798
	    *mem_p = true;
799
800
	  return 1;
801
	}
802
    }
803
  return 0;
804
}
805
806
/* Similar to the function reg_mentioned_p, return true only when
807
   register REG appears in a MEM container of RTX IN.  */
808
809
bool
810
reg_mentioned_by_mem_p (const_rtx reg, const_rtx in)
811
{
812
  bool mem = false;
813
814
  reg_mentioned_by_mem_p_1 (reg, in, &mem);
815
  return mem;
816
}
817
818
/* Return true if dest regsiter in set_insn is used in use_insn as 
819
   address calculation.
820
   For example, returns true if 
821
     set_insn: reg_a = reg_b
822
     use_insn: reg_c = (reg_a) # reg_a used in addr calculation
823
   False if
824
     set_insn: reg_a = reg_b
825
     use_insn: (reg_c) = reg_a # reg_a is used, by not as addr.  */
826
827
bool
828
reg_dep_by_addr_p (const_rtx set_insn, const_rtx use_insn)
829
{
830
  rtx pattern = PATTERN (set_insn);
831
  rtx set_dest = NULL;
832
833
  switch (GET_CODE (pattern))
834
    {
835
      case SET:
836
        set_dest = SET_DEST (pattern);
837
        break;
838
      case PARALLEL:
839
        {
840
          rtx pattern2 = XVECEXP (PATTERN (set_insn), 0,0);
841
  	  if (GET_CODE (pattern2) == SET)
842
  	    set_dest = SET_DEST (pattern2);
843
          break;
844
        }
845
      default:
846
        set_dest = NULL;
847
    }
848
849
  /* True if destination of set is reg and used as address.  */
850
  return set_dest && REG_P (set_dest) 
851
         && reg_mentioned_by_mem_p (set_dest, use_insn);
852
}
853
731
854
732
/* Return 1 if in between BEG and END, exclusive of BEG and END, there is
855
/* Return 1 if in between BEG and END, exclusive of BEG and END, there is
733
   no CODE_LABEL insn.  */
856
   no CODE_LABEL insn.  */
(-)gcc/genautomata.c (-60 / +99 lines)
Lines 1-5 Link Here
1
/* Pipeline hazard description translator.
1
/* Pipeline hazard description translator.
2
   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
2
   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3
   Free Software Foundation, Inc.
3
   Free Software Foundation, Inc.
4
4
5
   Written by Vladimir Makarov <vmakarov@redhat.com>
5
   Written by Vladimir Makarov <vmakarov@redhat.com>
Lines 22-42 Link Here
22
22
23
/* References:
23
/* References:
24
24
25
   1. Detecting pipeline structural hazards quickly. T. Proebsting,
25
   1. The finite state automaton based pipeline hazard recognizer and
26
      instruction scheduler in GCC.  V. Makarov.  Proceedings of GCC
27
      summit, 2003.
28
29
   2. Detecting pipeline structural hazards quickly. T. Proebsting,
26
      C. Fraser. Proceedings of ACM SIGPLAN-SIGACT Symposium on
30
      C. Fraser. Proceedings of ACM SIGPLAN-SIGACT Symposium on
27
      Principles of Programming Languages, pages 280--286, 1994.
31
      Principles of Programming Languages, pages 280--286, 1994.
28
32
29
      This article is a good start point to understand usage of finite
33
      This article is a good start point to understand usage of finite
30
      state automata for pipeline hazard recognizers.  But I'd
34
      state automata for pipeline hazard recognizers.  But I'd
31
      recommend the 2nd article for more deep understanding.
35
      recommend the 1st and 3rd article for more deep understanding.
32
36
33
   2. Efficient Instruction Scheduling Using Finite State Automata:
37
   3. Efficient Instruction Scheduling Using Finite State Automata:
34
      V. Bala and N. Rubin, Proceedings of MICRO-28.  This is the best
38
      V. Bala and N. Rubin, Proceedings of MICRO-28.  This is the best
35
      article about usage of finite state automata for pipeline hazard
39
      article about usage of finite state automata for pipeline hazard
36
      recognizers.
40
      recognizers.
37
41
38
   The current implementation is different from the 2nd article in the
42
   The current implementation is described in the 1st article and it
39
   following:
43
   is different from the 3rd article in the following:
40
44
41
   1. New operator `|' (alternative) is permitted in functional unit
45
   1. New operator `|' (alternative) is permitted in functional unit
42
      reservation which can be treated deterministically and
46
      reservation which can be treated deterministically and
Lines 463-469 Link Here
463
     insn.  */
467
     insn.  */
464
  int insn_num;
468
  int insn_num;
465
  /* The following field value is list of bypasses in which given insn
469
  /* The following field value is list of bypasses in which given insn
466
     is output insn.  */
470
     is output insn.  Bypasses with the same input insn stay one after
471
     another in the list in the same order as their occurrences in the
472
     description but the bypass without a guard stays always the last
473
     in a row of bypasses with the same input insn.  */
467
  struct bypass_decl *bypass_list;
474
  struct bypass_decl *bypass_list;
468
475
469
  /* The following fields are defined by automaton generator.  */
476
  /* The following fields are defined by automaton generator.  */
Lines 2367-2384 Link Here
2367
}
2374
}
2368
2375
2369
2376
2370
/* The function searches for bypass with given IN_INSN_RESERV in given
2377
/* The function inserts BYPASS in the list of bypasses of the
2371
   BYPASS_LIST.  */
2378
   corresponding output insn.  The order of bypasses in the list is
2372
static struct bypass_decl *
2379
   decribed in a comment for member `bypass_list' (see above).  If
2373
find_bypass (struct bypass_decl *bypass_list,
2380
   there is already the same bypass in the list the function reports
2374
	     struct insn_reserv_decl *in_insn_reserv)
2381
   this and does nothing.  */
2382
static void
2383
insert_bypass (struct bypass_decl *bypass)
2375
{
2384
{
2376
  struct bypass_decl *bypass;
2385
  struct bypass_decl *curr, *last;
2377
2386
  struct insn_reserv_decl *out_insn_reserv = bypass->out_insn_reserv;
2378
  for (bypass = bypass_list; bypass != NULL; bypass = bypass->next)
2387
  struct insn_reserv_decl *in_insn_reserv = bypass->in_insn_reserv;
2379
    if (bypass->in_insn_reserv == in_insn_reserv)
2388
  
2380
      break;
2389
  for (curr = out_insn_reserv->bypass_list, last = NULL;
2381
  return bypass;
2390
       curr != NULL;
2391
       last = curr, curr = curr->next)
2392
    if (curr->in_insn_reserv == in_insn_reserv)
2393
      {
2394
	if ((bypass->bypass_guard_name != NULL
2395
	     && curr->bypass_guard_name != NULL
2396
	     && ! strcmp (bypass->bypass_guard_name, curr->bypass_guard_name))
2397
	    || bypass->bypass_guard_name == curr->bypass_guard_name)
2398
	  {
2399
	    if (bypass->bypass_guard_name == NULL)
2400
	      {
2401
		if (!w_flag)
2402
		  error ("the same bypass `%s - %s' is already defined",
2403
			 bypass->out_insn_name, bypass->in_insn_name);
2404
		else
2405
		  warning (0, "the same bypass `%s - %s' is already defined",
2406
			   bypass->out_insn_name, bypass->in_insn_name);
2407
	      }
2408
	    else if (!w_flag)
2409
	      error ("the same bypass `%s - %s' (guard %s) is already defined",
2410
		     bypass->out_insn_name, bypass->in_insn_name,
2411
		     bypass->bypass_guard_name);
2412
	    else
2413
	      warning
2414
		(0, "the same bypass `%s - %s' (guard %s) is already defined",
2415
		 bypass->out_insn_name, bypass->in_insn_name,
2416
		 bypass->bypass_guard_name);
2417
	    return;
2418
	  }
2419
	if (curr->bypass_guard_name == NULL)
2420
	  break;
2421
	if (curr->next == NULL || curr->next->in_insn_reserv != in_insn_reserv)
2422
	  {
2423
	    last = curr;
2424
	    break;
2425
	  }
2426
	  
2427
      }
2428
  if (last == NULL)
2429
    {
2430
      bypass->next = out_insn_reserv->bypass_list;
2431
      out_insn_reserv->bypass_list = bypass;
2432
    }
2433
  else
2434
    {
2435
      bypass->next = last->next;
2436
      last->next = bypass;
2437
    }
2382
}
2438
}
2383
2439
2384
/* The function processes pipeline description declarations, checks
2440
/* The function processes pipeline description declarations, checks
Lines 2391-2397 Link Here
2391
  decl_t decl_in_table;
2447
  decl_t decl_in_table;
2392
  decl_t out_insn_reserv;
2448
  decl_t out_insn_reserv;
2393
  decl_t in_insn_reserv;
2449
  decl_t in_insn_reserv;
2394
  struct bypass_decl *bypass;
2395
  int automaton_presence;
2450
  int automaton_presence;
2396
  int i;
2451
  int i;
2397
2452
Lines 2514-2549 Link Here
2514
		= DECL_INSN_RESERV (out_insn_reserv);
2569
		= DECL_INSN_RESERV (out_insn_reserv);
2515
	      DECL_BYPASS (decl)->in_insn_reserv
2570
	      DECL_BYPASS (decl)->in_insn_reserv
2516
		= DECL_INSN_RESERV (in_insn_reserv);
2571
		= DECL_INSN_RESERV (in_insn_reserv);
2517
	      bypass
2572
	      insert_bypass (DECL_BYPASS (decl));
2518
		= find_bypass (DECL_INSN_RESERV (out_insn_reserv)->bypass_list,
2519
			       DECL_BYPASS (decl)->in_insn_reserv);
2520
	      if (bypass != NULL)
2521
		{
2522
		  if (DECL_BYPASS (decl)->latency == bypass->latency)
2523
		    {
2524
		      if (!w_flag)
2525
			error
2526
			  ("the same bypass `%s - %s' is already defined",
2527
			   DECL_BYPASS (decl)->out_insn_name,
2528
			   DECL_BYPASS (decl)->in_insn_name);
2529
		      else
2530
			warning
2531
			  (0, "the same bypass `%s - %s' is already defined",
2532
			   DECL_BYPASS (decl)->out_insn_name,
2533
			   DECL_BYPASS (decl)->in_insn_name);
2534
		    }
2535
		  else
2536
		    error ("bypass `%s - %s' is already defined",
2537
			   DECL_BYPASS (decl)->out_insn_name,
2538
			   DECL_BYPASS (decl)->in_insn_name);
2539
		}
2540
	      else
2541
		{
2542
		  DECL_BYPASS (decl)->next
2543
		    = DECL_INSN_RESERV (out_insn_reserv)->bypass_list;
2544
		  DECL_INSN_RESERV (out_insn_reserv)->bypass_list
2545
		    = DECL_BYPASS (decl);
2546
		}
2547
	    }
2573
	    }
2548
	}
2574
	}
2549
    }
2575
    }
Lines 8159-8177 Link Here
8159
			    (advance_cycle_insn_decl)->insn_num));
8185
			    (advance_cycle_insn_decl)->insn_num));
8160
	    fprintf (output_file, "        case %d:\n",
8186
	    fprintf (output_file, "        case %d:\n",
8161
		     bypass->in_insn_reserv->insn_num);
8187
		     bypass->in_insn_reserv->insn_num);
8162
	    if (bypass->bypass_guard_name == NULL)
8188
	    for (;;)
8163
	      fprintf (output_file, "          return %d;\n",
8164
		       bypass->latency);
8165
	    else
8166
	      {
8189
	      {
8167
		fprintf (output_file,
8190
		if (bypass->bypass_guard_name == NULL)
8168
			 "          if (%s (%s, %s))\n",
8191
		  {
8169
			 bypass->bypass_guard_name, INSN_PARAMETER_NAME,
8192
		    gcc_assert (bypass->next == NULL
8170
			 INSN2_PARAMETER_NAME);
8193
				|| (bypass->in_insn_reserv
8171
		fprintf (output_file,
8194
				    != bypass->next->in_insn_reserv));
8172
			 "            return %d;\n          break;\n",
8195
		    fprintf (output_file, "          return %d;\n",
8173
			 bypass->latency);
8196
			     bypass->latency);
8197
		  }
8198
		else
8199
		  {
8200
		    fprintf (output_file,
8201
			     "          if (%s (%s, %s))\n",
8202
			     bypass->bypass_guard_name, INSN_PARAMETER_NAME,
8203
			     INSN2_PARAMETER_NAME);
8204
		    fprintf (output_file, "            return %d;\n",
8205
			     bypass->latency);
8206
		  }
8207
		if (bypass->next == NULL
8208
		    || bypass->in_insn_reserv != bypass->next->in_insn_reserv)
8209
		  break;
8210
		bypass = bypass->next;
8174
	      }
8211
	      }
8212
	    if (bypass->bypass_guard_name != NULL)
8213
	      fprintf (output_file, "          break;\n");
8175
	  }
8214
	  }
8176
	fputs ("        }\n      break;\n", output_file);
8215
	fputs ("        }\n      break;\n", output_file);
8177
      }
8216
      }
(-)gcc/rtl.def (-1 / +5 lines)
Lines 1088-1094 Link Here
1088
   guard for the bypass.  The function will get the two insns as
1088
   guard for the bypass.  The function will get the two insns as
1089
   parameters.  If the function returns zero the bypass will be
1089
   parameters.  If the function returns zero the bypass will be
1090
   ignored for this case.  Additional guard is necessary to recognize
1090
   ignored for this case.  Additional guard is necessary to recognize
1091
   complicated bypasses, e.g. when consumer is load address.  */
1091
   complicated bypasses, e.g. when consumer is load address.  If there
1092
   are more one bypass with the same output and input insns, the
1093
   chosen bypass is the first bypass with a guard in description whose
1094
   guard function returns nonzero.  If there is no such bypass, then
1095
   bypass without the guard function is chosen.  */
1092
DEF_RTL_EXPR(DEFINE_BYPASS, "define_bypass", "issS", RTX_EXTRA)
1096
DEF_RTL_EXPR(DEFINE_BYPASS, "define_bypass", "issS", RTX_EXTRA)
1093
1097
1094
/* (define_automaton string) describes names of automata generated and
1098
/* (define_automaton string) describes names of automata generated and
(-)gcc/ChangeLog.atom (+134 lines)
Line 0 Link Here
1
2009-02-05  Joey Ye  <joey.ye@intel.com>
2
	    Xuepeng Guo <xuepeng.guo@intel.com>
3
	    H.J. Lu  <hongjiu.lu@intel.com>
4
5
	Atom pipeline model, tuning and insn selection.
6
	* rtlanal.c (reg_mentioned_by_mem_p_1): New function.
7
	(reg_mentioned_by_mem_p): New function.
8
	(reg_dep_by_addr_p): New function.
9
10
	* rtl.h (reg_mentioned_by_mem_p): Declare new function.
11
	(reg_dep_by_addr_p): Likewise.
12
13
	* config.gcc (atom): Add atom config options and target.
14
15
	* config/i386/i386.h (TARGET_ATOM): New target macro.
16
	(X86_TUNE_OPT_AGU): New tuning flag.
17
	(TARGET_OPT_AGU): New target option.
18
	(TARGET_CPU_DEFAULT_atom): New CPU default.
19
	(PROCESSOR_ATOM): New processor.
20
21
	* config/i386/i386-c.c (ix86_target_macros_internal): New case
22
	PROCESSOR_ATOM.
23
	(ix86_target_macros_internal): Likewise.
24
25
	* config/i386/i386-protos.h (ix86_lea_for_add_ok): Declare new
26
	function.
27
	(ix86_dep_by_shift_count): Likewise.
28
	(ix86_agi_dependent): Likewise.
29
30
	* config/i386/i386.c (atom_cost): New cost.
31
	(m_ATOM): New macro flag.
32
	(initial_ix86_tune_fe): Set m_ATOM.
33
	(x86_accumulate_outgoing_args): Likewise.
34
	(x86_arch_always_fancy_math_387): Likewise.
35
	(processor_target): Add Atom cost.
36
	(cpu_names): Add Atom cpu name.
37
	(override_options): Set Atom ISA.
38
	(LEA_SEARCH_THRESHOLD): New macro.
39
	(distance_non_agu_define): New function.
40
	(distance_agu_use): Likewise.
41
	(ix86_lea_for_add_ok): Likewise.
42
	(ix86_dep_by_shift_count): Likewise.
43
	(ix86_agi_dependent): Make it global.
44
	(ix86_issue_rate): New case PROCESSOR_ATOM.
45
	(ix86_adjust_cost): Likewise.
46
47
	* config/i386/i386.md (cpu): Add new value "atom".
48
	(atom.md): Include atom.md.
49
	(use_carry, movu): New attr.
50
	(adddi3_carry_rex64): Set attr "use_carry".
51
	(addqi3_carry): Likewise.
52
	(addhi3_carry): Likewise.
53
	(addsi3_carry): Likewise.
54
	(*addsi3_carry_zext): Likewise.
55
	(subdi3_carry_rex64): Likewise.
56
	(subqi3_carry): Likewise.
57
	(subhi3_carry): Likewise.
58
	(subsi3_carry): Likewise.
59
	(x86_movdicc_0_m1_rex64): Likewise.
60
	(*x86_movdicc_0_m1_se): Likewise.
61
	(x86_movsicc_0_m1): Likewise.
62
	(*x86_movsicc_0_m1_se): Likewise.
63
	(*adddi_1_rex64): Emit add insn as much as possible.
64
	(*addsi_1): Likewise.
65
	(return_internal): Set atom_unit.
66
	(return_internal_long): Likewise.
67
	(return_pop_internal): Likewise.
68
	(*rcpsf2_sse): Set atom_sse_attr attr.
69
	(*qrt<mode>2_sse): Likewise.
70
	(*prefetch_sse): Likewise.
71
72
	* config/i386/sse.md (cpu): Set attr "atom_sse_attr".
73
	(*prefetch_sse_rex): Likewise.
74
	(sse_rcpv4sf2): Likewise.
75
	(sse_vmrcpv4sf2): Likewise.
76
	(sse_sqrtv4sf2): Likewise.
77
	(<sse>_vmsqrt<mode>2): Likewise.
78
	(sse_ldmxcsr): Likewise.
79
	(sse_stmxcsr): Likewise.
80
	(*sse_sfence): Likewise.
81
	(sse2_clflush): Likewise.
82
	(*sse2_mfence): Likewise.
83
	(*sse2_lfence): Likewise.
84
	(avx_movup<avxmodesuffixf2c><avxmodesuffix>): Set attr "movu".
85
	(<sse>_movup<ssemodesuffixf2c>): Likewise.
86
	(avx_movdqu<avxmodesuffix>): Likewise.
87
	(avx_lddqu<avxmodesuffix>): Likewise.
88
	(sse2_movntv2di): Change attr "type" to "ssemov".
89
	(sse2_movntsi): Likewise.
90
	(rsqrtv8sf2): Change attr "type" to "sseadd".
91
	(sse3_addsubv2df3): Set attr "atom_unit".
92
	(sse3_h<plusminus_insn>v4sf3): Likewise.
93
	(*sse2_pmaddwd): Likewise.
94
	(*vec_extractv2di_1_rex64): Likewise.
95
	(*vec_extractv2di_1_avx): Likewise.
96
	(sse2_psadbw): Likewise.
97
	(ssse3_phaddwv8hi3): Likewise.
98
	(ssse3_phaddwv4hi3): Likewise.
99
	(ssse3_phadddv4si3): Likewise.
100
	(ssse3_phadddv2si3): Likewise.
101
	(ssse3_phaddswv8hi3): Likewise.
102
	(ssse3_phaddswv4hi3): Likewise.
103
	(ssse3_phsubwv8hi3): Likewise.
104
	(ssse3_phsubwv4hi3): Likewise.
105
	(ssse3_phsubdv4si3): Likewise.
106
	(ssse3_phsubdv2si3): Likewise.
107
	(ssse3_phsubswv8hi3): Likewise.
108
	(ssse3_phsubswv4hi3): Likewise.
109
	(ssse3_pmaddubsw128): Likewise.
110
	(sse3_pmaddubsw: Likewise.
111
	(ssse3_palignrti): Likewise.
112
	(ssse3_palignrdi): Likewise.
113
114
	* config/i386/atom.md: New.
115
116
2009-02-05  H.J. Lu  <hongjiu.lu@intel.com>
117
118
	* config/i386/i386.c (ix86_agi_dependent): Remove the third
119
	argument.  Swap the first 2 arguments.
120
	(ix86_adjust_cost): Updated.
121
122
2009-01-30  Vladimir Makarov  <vmakarov@redhat.com>
123
124
	* genautomata.c: Add a new year to the copyright.  Add a new
125
	reference.
126
	(struct insn_reserv_decl): Add comments for member bypass_list.
127
	(find_bypass): Remove.
128
	(insert_bypass): New.
129
	(process_decls): Use insert_bypass.
130
	(output_internal_insn_latency_func): Output all bypasses with the
131
	same input insn in one switch case.
132
133
	* rtl.def (define_bypass): Describe bypass choice.
134
	* doc/md.texi (define_bypass): Ditto.
(-)gcc/rtl.h (+2 lines)
Lines 1731-1736 Link Here
1731
extern bool offset_within_block_p (const_rtx, HOST_WIDE_INT);
1731
extern bool offset_within_block_p (const_rtx, HOST_WIDE_INT);
1732
extern void split_const (rtx, rtx *, rtx *);
1732
extern void split_const (rtx, rtx *, rtx *);
1733
extern int reg_mentioned_p (const_rtx, const_rtx);
1733
extern int reg_mentioned_p (const_rtx, const_rtx);
1734
extern bool reg_mentioned_by_mem_p (const_rtx, const_rtx);
1735
extern bool reg_dep_by_addr_p (const_rtx, const_rtx);
1734
extern int count_occurrences (const_rtx, const_rtx, int);
1736
extern int count_occurrences (const_rtx, const_rtx, int);
1735
extern int reg_referenced_p (const_rtx, const_rtx);
1737
extern int reg_referenced_p (const_rtx, const_rtx);
1736
extern int reg_used_between_p (const_rtx, const_rtx, const_rtx);
1738
extern int reg_used_between_p (const_rtx, const_rtx, const_rtx);
(-)gcc/config.gcc (-5 / +5 lines)
Lines 1087-1093 Link Here
1087
			tmake_file="${tmake_file} i386/t-linux64"
1087
			tmake_file="${tmake_file} i386/t-linux64"
1088
			need_64bit_hwint=yes
1088
			need_64bit_hwint=yes
1089
			case X"${with_cpu}" in
1089
			case X"${with_cpu}" in
1090
			Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
1090
			Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
1091
				;;
1091
				;;
1092
			X)
1092
			X)
1093
				if test x$with_cpu_64 = x; then
1093
				if test x$with_cpu_64 = x; then
Lines 1096-1102 Link Here
1096
				;;
1096
				;;
1097
			*)
1097
			*)
1098
				echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
1098
				echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
1099
				echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
1099
				echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
1100
				exit 1
1100
				exit 1
1101
				;;
1101
				;;
1102
			esac
1102
			esac
Lines 1201-1207 Link Here
1201
		# libgcc/configure.ac instead.
1201
		# libgcc/configure.ac instead.
1202
		need_64bit_hwint=yes
1202
		need_64bit_hwint=yes
1203
		case X"${with_cpu}" in
1203
		case X"${with_cpu}" in
1204
		Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
1204
		Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
1205
			;;
1205
			;;
1206
		X)
1206
		X)
1207
			if test x$with_cpu_64 = x; then
1207
			if test x$with_cpu_64 = x; then
Lines 1210-1216 Link Here
1210
			;;
1210
			;;
1211
		*)
1211
		*)
1212
			echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
1212
			echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
1213
			echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
1213
			echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
1214
			exit 1
1214
			exit 1
1215
			;;
1215
			;;
1216
		esac
1216
		esac
Lines 2803-2809 Link Here
2803
				esac
2803
				esac
2804
				# OK
2804
				# OK
2805
				;;
2805
				;;
2806
			"" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
2806
			"" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | atom | generic)
2807
				# OK
2807
				# OK
2808
				;;
2808
				;;
2809
			*)
2809
			*)
(-)gcc/config/i386/i386.h (+5 lines)
Lines 231-236 Link Here
231
#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
231
#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
232
#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
232
#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
233
#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
233
#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
234
#define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM)
234
235
235
/* Feature tests against the various tunings.  */
236
/* Feature tests against the various tunings.  */
236
enum ix86_tune_indices {
237
enum ix86_tune_indices {
Lines 295-300 Link Here
295
  X86_TUNE_USE_VECTOR_FP_CONVERTS,
296
  X86_TUNE_USE_VECTOR_FP_CONVERTS,
296
  X86_TUNE_USE_VECTOR_CONVERTS,
297
  X86_TUNE_USE_VECTOR_CONVERTS,
297
  X86_TUNE_FUSE_CMP_AND_BRANCH,
298
  X86_TUNE_FUSE_CMP_AND_BRANCH,
299
  X86_TUNE_OPT_AGU,
298
300
299
  X86_TUNE_LAST
301
  X86_TUNE_LAST
300
};
302
};
Lines 382-387 Link Here
382
	ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
384
	ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
383
#define TARGET_FUSE_CMP_AND_BRANCH \
385
#define TARGET_FUSE_CMP_AND_BRANCH \
384
	ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH]
386
	ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH]
387
#define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU]
385
388
386
/* Feature tests against the various architecture variations.  */
389
/* Feature tests against the various architecture variations.  */
387
enum ix86_arch_indices {
390
enum ix86_arch_indices {
Lines 564-569 Link Here
564
  TARGET_CPU_DEFAULT_prescott,
567
  TARGET_CPU_DEFAULT_prescott,
565
  TARGET_CPU_DEFAULT_nocona,
568
  TARGET_CPU_DEFAULT_nocona,
566
  TARGET_CPU_DEFAULT_core2,
569
  TARGET_CPU_DEFAULT_core2,
570
  TARGET_CPU_DEFAULT_atom,
567
571
568
  TARGET_CPU_DEFAULT_geode,
572
  TARGET_CPU_DEFAULT_geode,
569
  TARGET_CPU_DEFAULT_k6,
573
  TARGET_CPU_DEFAULT_k6,
Lines 2256-2261 Link Here
2256
  PROCESSOR_GENERIC32,
2260
  PROCESSOR_GENERIC32,
2257
  PROCESSOR_GENERIC64,
2261
  PROCESSOR_GENERIC64,
2258
  PROCESSOR_AMDFAM10,
2262
  PROCESSOR_AMDFAM10,
2263
  PROCESSOR_ATOM,
2259
  PROCESSOR_max
2264
  PROCESSOR_max
2260
};
2265
};
2261
2266
(-)gcc/config/i386/i386.md (-15 / +58 lines)
Lines 297-303 Link Here
297
297
298
298
299
;; Processor type.
299
;; Processor type.
300
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,
300
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,atom,
301
		    generic64,amdfam10"
301
		    generic64,amdfam10"
302
  (const (symbol_ref "ix86_schedule")))
302
  (const (symbol_ref "ix86_schedule")))
303
303
Lines 593-598 Link Here
593
(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
593
(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
594
  (const_string "any"))
594
  (const_string "any"))
595
595
596
;; Define attribute to classify add/sub insns that consumes carry flag (CF)
597
(define_attr "use_carry" "0,1" (const_string "0"))
598
599
;; Define attribute to indicate unaligned ssemov insns
600
(define_attr "movu" "0,1" (const_string "0"))
601
596
;; Describe a user's asm statement.
602
;; Describe a user's asm statement.
597
(define_asm_attributes
603
(define_asm_attributes
598
  [(set_attr "length" "128")
604
  [(set_attr "length" "128")
Lines 708-713 Link Here
708
(include "k6.md")
714
(include "k6.md")
709
(include "athlon.md")
715
(include "athlon.md")
710
(include "geode.md")
716
(include "geode.md")
717
(include "atom.md")
711
718
712
719
713
;; Operand and operator predicates and constraints
720
;; Operand and operator predicates and constraints
Lines 5775-5780 Link Here
5775
  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
5782
  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
5776
  "adc{q}\t{%2, %0|%0, %2}"
5783
  "adc{q}\t{%2, %0|%0, %2}"
5777
  [(set_attr "type" "alu")
5784
  [(set_attr "type" "alu")
5785
   (set_attr "use_carry" "1")
5778
   (set_attr "pent_pair" "pu")
5786
   (set_attr "pent_pair" "pu")
5779
   (set_attr "mode" "DI")])
5787
   (set_attr "mode" "DI")])
5780
5788
Lines 5849-5854 Link Here
5849
  "ix86_binary_operator_ok (PLUS, QImode, operands)"
5857
  "ix86_binary_operator_ok (PLUS, QImode, operands)"
5850
  "adc{b}\t{%2, %0|%0, %2}"
5858
  "adc{b}\t{%2, %0|%0, %2}"
5851
  [(set_attr "type" "alu")
5859
  [(set_attr "type" "alu")
5860
   (set_attr "use_carry" "1")
5852
   (set_attr "pent_pair" "pu")
5861
   (set_attr "pent_pair" "pu")
5853
   (set_attr "mode" "QI")])
5862
   (set_attr "mode" "QI")])
5854
5863
Lines 5861-5866 Link Here
5861
  "ix86_binary_operator_ok (PLUS, HImode, operands)"
5870
  "ix86_binary_operator_ok (PLUS, HImode, operands)"
5862
  "adc{w}\t{%2, %0|%0, %2}"
5871
  "adc{w}\t{%2, %0|%0, %2}"
5863
  [(set_attr "type" "alu")
5872
  [(set_attr "type" "alu")
5873
   (set_attr "use_carry" "1")
5864
   (set_attr "pent_pair" "pu")
5874
   (set_attr "pent_pair" "pu")
5865
   (set_attr "mode" "HI")])
5875
   (set_attr "mode" "HI")])
5866
5876
Lines 5873-5878 Link Here
5873
  "ix86_binary_operator_ok (PLUS, SImode, operands)"
5883
  "ix86_binary_operator_ok (PLUS, SImode, operands)"
5874
  "adc{l}\t{%2, %0|%0, %2}"
5884
  "adc{l}\t{%2, %0|%0, %2}"
5875
  [(set_attr "type" "alu")
5885
  [(set_attr "type" "alu")
5886
   (set_attr "use_carry" "1")
5876
   (set_attr "pent_pair" "pu")
5887
   (set_attr "pent_pair" "pu")
5877
   (set_attr "mode" "SI")])
5888
   (set_attr "mode" "SI")])
5878
5889
Lines 5886-5891 Link Here
5886
  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
5897
  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
5887
  "adc{l}\t{%2, %k0|%k0, %2}"
5898
  "adc{l}\t{%2, %k0|%k0, %2}"
5888
  [(set_attr "type" "alu")
5899
  [(set_attr "type" "alu")
5900
   (set_attr "use_carry" "1")
5889
   (set_attr "pent_pair" "pu")
5901
   (set_attr "pent_pair" "pu")
5890
   (set_attr "mode" "SI")])
5902
   (set_attr "mode" "SI")])
5891
5903
Lines 6115-6123 Link Here
6115
   (set_attr "mode" "SI")])
6127
   (set_attr "mode" "SI")])
6116
6128
6117
(define_insn "*adddi_1_rex64"
6129
(define_insn "*adddi_1_rex64"
6118
  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
6130
  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
6119
	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r")
6131
	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r,r")
6120
		 (match_operand:DI 2 "x86_64_general_operand" "rme,re,le")))
6132
		 (match_operand:DI 2 "x86_64_general_operand" "rme,re,0,le")))
6121
   (clobber (reg:CC FLAGS_REG))]
6133
   (clobber (reg:CC FLAGS_REG))]
6122
  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
6134
  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
6123
{
6135
{
Lines 6138-6143 Link Here
6138
	}
6150
	}
6139
6151
6140
    default:
6152
    default:
6153
      /* Use add as much as possible to replace lea for AGU optimization. */
6154
      if (which_alternative == 2 && TARGET_OPT_AGU)
6155
        return "add{q}\t{%1, %0|%0, %1}";
6156
        
6141
      gcc_assert (rtx_equal_p (operands[0], operands[1]));
6157
      gcc_assert (rtx_equal_p (operands[0], operands[1]));
6142
6158
6143
      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
6159
      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
Lines 6156-6163 Link Here
6156
    }
6172
    }
6157
}
6173
}
6158
  [(set (attr "type")
6174
  [(set (attr "type")
6159
     (cond [(eq_attr "alternative" "2")
6175
     (cond [(and (eq_attr "alternative" "2") 
6176
                 (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
6160
	      (const_string "lea")
6177
	      (const_string "lea")
6178
            (eq_attr "alternative" "3")
6179
              (const_string "lea")
6161
	    ; Current assemblers are broken and do not allow @GOTOFF in
6180
	    ; Current assemblers are broken and do not allow @GOTOFF in
6162
	    ; ought but a memory context.
6181
	    ; ought but a memory context.
6163
	    (match_operand:DI 2 "pic_symbolic_operand" "")
6182
	    (match_operand:DI 2 "pic_symbolic_operand" "")
Lines 6174-6181 Link Here
6174
	(plus:DI (match_operand:DI 1 "register_operand" "")
6193
	(plus:DI (match_operand:DI 1 "register_operand" "")
6175
		 (match_operand:DI 2 "x86_64_nonmemory_operand" "")))
6194
		 (match_operand:DI 2 "x86_64_nonmemory_operand" "")))
6176
   (clobber (reg:CC FLAGS_REG))]
6195
   (clobber (reg:CC FLAGS_REG))]
6177
  "TARGET_64BIT && reload_completed
6196
  "TARGET_64BIT && reload_completed 
6178
   && true_regnum (operands[0]) != true_regnum (operands[1])"
6197
   && ix86_lea_for_add_ok (PLUS, insn, operands)"
6179
  [(set (match_dup 0)
6198
  [(set (match_dup 0)
6180
	(plus:DI (match_dup 1)
6199
	(plus:DI (match_dup 1)
6181
		 (match_dup 2)))]
6200
		 (match_dup 2)))]
Lines 6379-6387 Link Here
6379
6398
6380
6399
6381
(define_insn "*addsi_1"
6400
(define_insn "*addsi_1"
6382
  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r")
6401
  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r")
6383
	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r")
6402
	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r,r")
6384
		 (match_operand:SI 2 "general_operand" "g,ri,li")))
6403
		 (match_operand:SI 2 "general_operand" "g,ri,0,li")))
6385
   (clobber (reg:CC FLAGS_REG))]
6404
   (clobber (reg:CC FLAGS_REG))]
6386
  "ix86_binary_operator_ok (PLUS, SImode, operands)"
6405
  "ix86_binary_operator_ok (PLUS, SImode, operands)"
6387
{
6406
{
Lines 6402-6407 Link Here
6402
	}
6421
	}
6403
6422
6404
    default:
6423
    default:
6424
      /* Use add as much as possible to replace lea for AGU optimization. */
6425
      if (which_alternative == 2 && TARGET_OPT_AGU)
6426
        return "add{l}\t{%1, %0|%0, %1}";
6427
6405
      gcc_assert (rtx_equal_p (operands[0], operands[1]));
6428
      gcc_assert (rtx_equal_p (operands[0], operands[1]));
6406
6429
6407
      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
6430
      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
Lines 6418-6424 Link Here
6418
    }
6441
    }
6419
}
6442
}
6420
  [(set (attr "type")
6443
  [(set (attr "type")
6421
     (cond [(eq_attr "alternative" "2")
6444
     (cond [(and (eq_attr "alternative" "2") 
6445
                 (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
6446
               (const_string "lea")
6447
            (eq_attr "alternative" "3")
6422
	      (const_string "lea")
6448
	      (const_string "lea")
6423
	    ; Current assemblers are broken and do not allow @GOTOFF in
6449
	    ; Current assemblers are broken and do not allow @GOTOFF in
6424
	    ; ought but a memory context.
6450
	    ; ought but a memory context.
Lines 6436-6443 Link Here
6436
	(plus (match_operand 1 "register_operand" "")
6462
	(plus (match_operand 1 "register_operand" "")
6437
              (match_operand 2 "nonmemory_operand" "")))
6463
              (match_operand 2 "nonmemory_operand" "")))
6438
   (clobber (reg:CC FLAGS_REG))]
6464
   (clobber (reg:CC FLAGS_REG))]
6439
  "reload_completed
6465
  "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)" 
6440
   && true_regnum (operands[0]) != true_regnum (operands[1])"
6441
  [(const_int 0)]
6466
  [(const_int 0)]
6442
{
6467
{
6443
  rtx pat;
6468
  rtx pat;
Lines 7538-7543 Link Here
7538
  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
7563
  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
7539
  "sbb{q}\t{%2, %0|%0, %2}"
7564
  "sbb{q}\t{%2, %0|%0, %2}"
7540
  [(set_attr "type" "alu")
7565
  [(set_attr "type" "alu")
7566
   (set_attr "use_carry" "1")
7541
   (set_attr "pent_pair" "pu")
7567
   (set_attr "pent_pair" "pu")
7542
   (set_attr "mode" "DI")])
7568
   (set_attr "mode" "DI")])
7543
7569
Lines 7586-7591 Link Here
7586
  "ix86_binary_operator_ok (MINUS, QImode, operands)"
7612
  "ix86_binary_operator_ok (MINUS, QImode, operands)"
7587
  "sbb{b}\t{%2, %0|%0, %2}"
7613
  "sbb{b}\t{%2, %0|%0, %2}"
7588
  [(set_attr "type" "alu")
7614
  [(set_attr "type" "alu")
7615
   (set_attr "use_carry" "1")
7589
   (set_attr "pent_pair" "pu")
7616
   (set_attr "pent_pair" "pu")
7590
   (set_attr "mode" "QI")])
7617
   (set_attr "mode" "QI")])
7591
7618
Lines 7598-7603 Link Here
7598
  "ix86_binary_operator_ok (MINUS, HImode, operands)"
7625
  "ix86_binary_operator_ok (MINUS, HImode, operands)"
7599
  "sbb{w}\t{%2, %0|%0, %2}"
7626
  "sbb{w}\t{%2, %0|%0, %2}"
7600
  [(set_attr "type" "alu")
7627
  [(set_attr "type" "alu")
7628
   (set_attr "use_carry" "1")
7601
   (set_attr "pent_pair" "pu")
7629
   (set_attr "pent_pair" "pu")
7602
   (set_attr "mode" "HI")])
7630
   (set_attr "mode" "HI")])
7603
7631
Lines 7610-7615 Link Here
7610
  "ix86_binary_operator_ok (MINUS, SImode, operands)"
7638
  "ix86_binary_operator_ok (MINUS, SImode, operands)"
7611
  "sbb{l}\t{%2, %0|%0, %2}"
7639
  "sbb{l}\t{%2, %0|%0, %2}"
7612
  [(set_attr "type" "alu")
7640
  [(set_attr "type" "alu")
7641
   (set_attr "use_carry" "1")
7613
   (set_attr "pent_pair" "pu")
7642
   (set_attr "pent_pair" "pu")
7614
   (set_attr "mode" "SI")])
7643
   (set_attr "mode" "SI")])
7615
7644
Lines 15223-15228 Link Here
15223
  "reload_completed"
15252
  "reload_completed"
15224
  "ret"
15253
  "ret"
15225
  [(set_attr "length" "1")
15254
  [(set_attr "length" "1")
15255
   (set_attr "atom_unit" "jeu")
15226
   (set_attr "length_immediate" "0")
15256
   (set_attr "length_immediate" "0")
15227
   (set_attr "modrm" "0")])
15257
   (set_attr "modrm" "0")])
15228
15258
Lines 15235-15240 Link Here
15235
  "reload_completed"
15265
  "reload_completed"
15236
  "rep\;ret"
15266
  "rep\;ret"
15237
  [(set_attr "length" "1")
15267
  [(set_attr "length" "1")
15268
   (set_attr "atom_unit" "jeu")
15238
   (set_attr "length_immediate" "0")
15269
   (set_attr "length_immediate" "0")
15239
   (set_attr "prefix_rep" "1")
15270
   (set_attr "prefix_rep" "1")
15240
   (set_attr "modrm" "0")])
15271
   (set_attr "modrm" "0")])
Lines 15245-15250 Link Here
15245
  "reload_completed"
15276
  "reload_completed"
15246
  "ret\t%0"
15277
  "ret\t%0"
15247
  [(set_attr "length" "3")
15278
  [(set_attr "length" "3")
15279
   (set_attr "atom_unit" "jeu")
15248
   (set_attr "length_immediate" "2")
15280
   (set_attr "length_immediate" "2")
15249
   (set_attr "modrm" "0")])
15281
   (set_attr "modrm" "0")])
15250
15282
Lines 16366-16371 Link Here
16366
  "TARGET_SSE_MATH"
16398
  "TARGET_SSE_MATH"
16367
  "%vrcpss\t{%1, %d0|%d0, %1}"
16399
  "%vrcpss\t{%1, %d0|%d0, %1}"
16368
  [(set_attr "type" "sse")
16400
  [(set_attr "type" "sse")
16401
   (set_attr "atom_sse_attr" "rcp")
16369
   (set_attr "prefix" "maybe_vex")
16402
   (set_attr "prefix" "maybe_vex")
16370
   (set_attr "mode" "SF")])
16403
   (set_attr "mode" "SF")])
16371
16404
Lines 16717-16722 Link Here
16717
  "TARGET_SSE_MATH"
16750
  "TARGET_SSE_MATH"
16718
  "%vrsqrtss\t{%1, %d0|%d0, %1}"
16751
  "%vrsqrtss\t{%1, %d0|%d0, %1}"
16719
  [(set_attr "type" "sse")
16752
  [(set_attr "type" "sse")
16753
   (set_attr "atom_sse_attr" "rcp")
16720
   (set_attr "prefix" "maybe_vex")
16754
   (set_attr "prefix" "maybe_vex")
16721
   (set_attr "mode" "SF")])
16755
   (set_attr "mode" "SF")])
16722
16756
Lines 16737-16742 Link Here
16737
  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
16771
  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
16738
  "%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
16772
  "%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
16739
  [(set_attr "type" "sse")
16773
  [(set_attr "type" "sse")
16774
   (set_attr "atom_sse_attr" "sqrt")
16740
   (set_attr "prefix" "maybe_vex")
16775
   (set_attr "prefix" "maybe_vex")
16741
   (set_attr "mode" "<MODE>")
16776
   (set_attr "mode" "<MODE>")
16742
   (set_attr "athlon_decode" "*")
16777
   (set_attr "athlon_decode" "*")
Lines 19732-19737 Link Here
19732
  ; Since we don't have the proper number of operands for an alu insn,
19767
  ; Since we don't have the proper number of operands for an alu insn,
19733
  ; fill in all the blanks.
19768
  ; fill in all the blanks.
19734
  [(set_attr "type" "alu")
19769
  [(set_attr "type" "alu")
19770
   (set_attr "use_carry" "1")
19735
   (set_attr "pent_pair" "pu")
19771
   (set_attr "pent_pair" "pu")
19736
   (set_attr "memory" "none")
19772
   (set_attr "memory" "none")
19737
   (set_attr "imm_disp" "false")
19773
   (set_attr "imm_disp" "false")
Lines 19747-19752 Link Here
19747
  ""
19783
  ""
19748
  "sbb{q}\t%0, %0"
19784
  "sbb{q}\t%0, %0"
19749
  [(set_attr "type" "alu")
19785
  [(set_attr "type" "alu")
19786
   (set_attr "use_carry" "1")
19750
   (set_attr "pent_pair" "pu")
19787
   (set_attr "pent_pair" "pu")
19751
   (set_attr "memory" "none")
19788
   (set_attr "memory" "none")
19752
   (set_attr "imm_disp" "false")
19789
   (set_attr "imm_disp" "false")
Lines 19790-19795 Link Here
19790
  ; Since we don't have the proper number of operands for an alu insn,
19827
  ; Since we don't have the proper number of operands for an alu insn,
19791
  ; fill in all the blanks.
19828
  ; fill in all the blanks.
19792
  [(set_attr "type" "alu")
19829
  [(set_attr "type" "alu")
19830
   (set_attr "use_carry" "1")
19793
   (set_attr "pent_pair" "pu")
19831
   (set_attr "pent_pair" "pu")
19794
   (set_attr "memory" "none")
19832
   (set_attr "memory" "none")
19795
   (set_attr "imm_disp" "false")
19833
   (set_attr "imm_disp" "false")
Lines 19805-19810 Link Here
19805
  ""
19843
  ""
19806
  "sbb{l}\t%0, %0"
19844
  "sbb{l}\t%0, %0"
19807
  [(set_attr "type" "alu")
19845
  [(set_attr "type" "alu")
19846
   (set_attr "use_carry" "1")
19808
   (set_attr "pent_pair" "pu")
19847
   (set_attr "pent_pair" "pu")
19809
   (set_attr "memory" "none")
19848
   (set_attr "memory" "none")
19810
   (set_attr "imm_disp" "false")
19849
   (set_attr "imm_disp" "false")
Lines 20137-20143 Link Here
20137
    }
20176
    }
20138
}
20177
}
20139
  [(set (attr "type")
20178
  [(set (attr "type")
20140
	(cond [(eq_attr "alternative" "0")
20179
	(cond [(and (eq_attr "alternative" "0") 
20180
	            (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
20141
		 (const_string "alu")
20181
		 (const_string "alu")
20142
	       (match_operand:SI 2 "const0_operand" "")
20182
	       (match_operand:SI 2 "const0_operand" "")
20143
		 (const_string "imov")
20183
		 (const_string "imov")
Lines 20180-20186 Link Here
20180
    }
20220
    }
20181
}
20221
}
20182
  [(set (attr "type")
20222
  [(set (attr "type")
20183
	(cond [(eq_attr "alternative" "0")
20223
	(cond [(and (eq_attr "alternative" "0")
20224
	            (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
20184
		 (const_string "alu")
20225
		 (const_string "alu")
20185
	       (match_operand:DI 2 "const0_operand" "")
20226
	       (match_operand:DI 2 "const0_operand" "")
20186
		 (const_string "imov")
20227
		 (const_string "imov")
Lines 21672-21677 Link Here
21672
  return patterns[locality];
21713
  return patterns[locality];
21673
}
21714
}
21674
  [(set_attr "type" "sse")
21715
  [(set_attr "type" "sse")
21716
   (set_attr "atom_sse_attr" "prefetch")
21675
   (set_attr "memory" "none")])
21717
   (set_attr "memory" "none")])
21676
21718
21677
(define_insn "*prefetch_sse_rex"
21719
(define_insn "*prefetch_sse_rex"
Lines 21690-21695 Link Here
21690
  return patterns[locality];
21732
  return patterns[locality];
21691
}
21733
}
21692
  [(set_attr "type" "sse")
21734
  [(set_attr "type" "sse")
21735
   (set_attr "atom_sse_attr" "prefetch")
21693
   (set_attr "memory" "none")])
21736
   (set_attr "memory" "none")])
21694
21737
21695
(define_insn "*prefetch_3dnow"
21738
(define_insn "*prefetch_3dnow"
(-)gcc/config/i386/atom.md (+796 lines)
Line 0 Link Here
1
;; Atom Scheduling
2
;; Copyright (C) 2009 Free Software Foundation, Inc.
3
;;
4
;; This file is part of GCC.
5
;;
6
;; GCC is free software; you can redistribute it and/or modify
7
;; it under the terms of the GNU General Public License as published by
8
;; the Free Software Foundation; either version 2, or (at your option)
9
;; any later version.
10
;;
11
;; GCC is distributed in the hope that it will be useful,
12
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
;; GNU General Public License for more details.
15
;;
16
;; You should have received a copy of the GNU General Public License
17
;; along with GCC; see the file COPYING.  If not, write to
18
;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
19
;; Boston, MA 02110-1301, USA.  */
20
;;
21
;; Atom is an in-order core with two integer pipelines.
22
23
24
(define_attr "atom_unit" "sishuf,simul,jeu,complex,other" 
25
  (const_string "other"))
26
27
(define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other"
28
  (const_string "other"))
29
30
(define_automaton "atom")
31
32
;;  Atom has two ports: port 0 and port 1 connecting to all execution units
33
(define_cpu_unit "atom-port-0,atom-port-1" "atom")
34
35
;;  EU: Execution Unit
36
;;  Atom EUs are connected by port 0 or port 1. 
37
38
(define_cpu_unit "atom-eu-0, atom-eu-1,
39
                  atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4"
40
                  "atom")
41
42
;; Some EUs have duplicated copied and can be accessed via either
43
;; port 0 or port 1
44
;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)")
45
46
;;; Some instructions is dual-pipe execution, need both ports
47
;;; Complex multi-op macro-instructoins need both ports and all EUs
48
(define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)")
49
(define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 + 
50
                                    atom-imul-1 + atom-imul-2 + atom-imul-3 +
51
                                    atom-imul-4)")
52
53
;;; Most of simple instructions have 1 cycle latency. Some of them
54
;;; issue in port 0, some in port 0 and some in either port.
55
(define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)")
56
(define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)")
57
(define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)")
58
59
;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput
60
(define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)")
61
62
;;; fmul insn can have 4 or 5 cycles latency
63
(define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4")
64
(define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3")
65
66
;;; fadd can has 5 cycles latency depends on instruction forms
67
(define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5")
68
69
;;; imul insn has 5 cycles latency
70
(define_reservation "atom-imul-32" 
71
                    "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4, 
72
                     atom-port-0")
73
;;; imul instruction excludes other non-FP instructions.
74
(exclusion_set "atom-eu-0, atom-eu-1" 
75
               "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4")
76
77
;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on 
78
;;; instruction forms
79
(define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)")
80
(define_reservation "atom-dual-2c"
81
                    "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)")
82
(define_reservation "atom-dual-5c"
83
                    "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)")
84
85
;;; Complex macro-instruction has variants of latency, and uses both ports.
86
(define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)")
87
88
(define_insn_reservation  "atom_other" 9
89
  (and (eq_attr "cpu" "atom")
90
       (and (eq_attr "type" "other")
91
            (eq_attr "atom_unit" "!jeu")))
92
  "atom-complex, atom-all-eu*8")
93
94
;; return has type "other" with atom_unit "jeu"
95
(define_insn_reservation  "atom_other_2" 1
96
  (and (eq_attr "cpu" "atom")
97
       (and (eq_attr "type" "other")
98
            (eq_attr "atom_unit" "jeu")))
99
  "atom-dual-1c")
100
101
(define_insn_reservation  "atom_multi" 9
102
  (and (eq_attr "cpu" "atom")
103
       (eq_attr "type" "multi"))
104
  "atom-complex, atom-all-eu*8")
105
106
;; Normal alu insns without carry
107
(define_insn_reservation  "atom_alu" 1
108
  (and (eq_attr "cpu" "atom")
109
       (and (eq_attr "type" "alu")
110
            (and (eq_attr "memory" "none")
111
                 (eq_attr "use_carry" "0"))))
112
  "atom-simple-either")
113
114
;; Normal alu insns without carry
115
(define_insn_reservation  "atom_alu_mem" 1
116
  (and (eq_attr "cpu" "atom")
117
       (and (eq_attr "type" "alu")
118
            (and (eq_attr "memory" "!none")
119
                 (eq_attr "use_carry" "0"))))
120
  "atom-simple-either")
121
122
;; Alu insn consuming CF, such as add/sbb
123
(define_insn_reservation  "atom_alu_carry" 1
124
  (and (eq_attr "cpu" "atom")
125
       (and (eq_attr "type" "alu")
126
            (and (eq_attr "memory" "none")
127
                 (eq_attr "use_carry" "1"))))
128
  "atom-simple-either")
129
130
;; Alu insn consuming CF, such as add/sbb
131
(define_insn_reservation  "atom_alu_carry_mem" 1
132
  (and (eq_attr "cpu" "atom")
133
       (and (eq_attr "type" "alu")
134
            (and (eq_attr "memory" "!none")
135
                (eq_attr "use_carry" "1"))))
136
  "atom-simple-either")
137
138
(define_insn_reservation  "atom_alu1" 1
139
  (and (eq_attr "cpu" "atom")
140
       (and (eq_attr "type" "alu1")
141
            (eq_attr "memory" "none")))
142
  "atom-simple-either")
143
144
(define_insn_reservation  "atom_alu1_mem" 1
145
  (and (eq_attr "cpu" "atom")
146
       (and (eq_attr "type" "alu1")
147
            (eq_attr "memory" "!none")))
148
  "atom-simple-either")
149
150
(define_insn_reservation  "atom_negnot" 1
151
  (and (eq_attr "cpu" "atom")
152
       (and (eq_attr "type" "negnot")
153
            (eq_attr "memory" "none")))
154
  "atom-simple-either")
155
156
(define_insn_reservation  "atom_negnot_mem" 1
157
  (and (eq_attr "cpu" "atom")
158
       (and (eq_attr "type" "negnot")
159
            (eq_attr "memory" "!none")))
160
  "atom-simple-either")
161
162
(define_insn_reservation  "atom_imov" 1
163
  (and (eq_attr "cpu" "atom")
164
       (and (eq_attr "type" "imov")
165
            (eq_attr "memory" "none")))
166
  "atom-simple-either")
167
168
(define_insn_reservation  "atom_imov_mem" 1
169
  (and (eq_attr "cpu" "atom")
170
       (and (eq_attr "type" "imov")
171
            (eq_attr "memory" "!none")))
172
  "atom-simple-either")
173
174
;; 16<-16, 32<-32
175
(define_insn_reservation  "atom_imovx" 1
176
  (and (eq_attr "cpu" "atom")
177
       (and (eq_attr "type" "imovx")
178
            (and (eq_attr "memory" "none")
179
                 (ior (and (match_operand:HI 0 "register_operand")
180
                           (match_operand:HI 1 "general_operand"))
181
                      (and (match_operand:SI 0 "register_operand")
182
                           (match_operand:SI 1 "general_operand"))))))
183
  "atom-simple-either")
184
185
;; 16<-16, 32<-32, mem
186
(define_insn_reservation  "atom_imovx_mem" 1
187
  (and (eq_attr "cpu" "atom")
188
       (and (eq_attr "type" "imovx")
189
            (and (eq_attr "memory" "!none")
190
                 (ior (and (match_operand:HI 0 "register_operand")
191
                           (match_operand:HI 1 "general_operand"))
192
                      (and (match_operand:SI 0 "register_operand")
193
                           (match_operand:SI 1 "general_operand"))))))
194
  "atom-simple-either")
195
196
;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
197
(define_insn_reservation  "atom_imovx_2" 1
198
  (and (eq_attr "cpu" "atom")
199
       (and (eq_attr "type" "imovx")
200
            (and (eq_attr "memory" "none")
201
                 (ior (match_operand:QI 0 "register_operand")
202
                      (ior (and (match_operand:SI 0 "register_operand")
203
                                (not (match_operand:SI 1 "general_operand")))
204
                           (match_operand:DI 0 "register_operand"))))))
205
  "atom-simple-0")
206
207
;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
208
(define_insn_reservation  "atom_imovx_2_mem" 1
209
  (and (eq_attr "cpu" "atom")
210
       (and (eq_attr "type" "imovx")
211
            (and (eq_attr "memory" "!none")
212
                 (ior (match_operand:QI 0 "register_operand")
213
                      (ior (and (match_operand:SI 0 "register_operand")
214
                                (not (match_operand:SI 1 "general_operand")))
215
                           (match_operand:DI 0 "register_operand"))))))
216
  "atom-simple-0")
217
218
;; 16<-8
219
(define_insn_reservation  "atom_imovx_3" 3
220
  (and (eq_attr "cpu" "atom")
221
       (and (eq_attr "type" "imovx")
222
            (and (match_operand:HI 0 "register_operand")
223
                 (match_operand:QI 1 "general_operand"))))
224
  "atom-complex, atom-all-eu*2")
225
226
(define_insn_reservation  "atom_lea" 1
227
  (and (eq_attr "cpu" "atom")
228
       (and (eq_attr "type" "lea")
229
            (eq_attr "mode" "!HI")))
230
  "atom-simple-either")
231
232
;; lea 16bit address is complex insn
233
(define_insn_reservation  "atom_lea_2" 2
234
  (and (eq_attr "cpu" "atom")
235
       (and (eq_attr "type" "lea")
236
            (eq_attr "mode" "HI")))
237
  "atom-complex, atom-all-eu")
238
239
(define_insn_reservation  "atom_incdec" 1
240
  (and (eq_attr "cpu" "atom")
241
       (and (eq_attr "type" "incdec")
242
            (eq_attr "memory" "none")))
243
  "atom-simple-either")
244
245
(define_insn_reservation  "atom_incdec_mem" 1
246
  (and (eq_attr "cpu" "atom")
247
       (and (eq_attr "type" "incdec")
248
            (eq_attr "memory" "!none")))
249
  "atom-simple-either")
250
251
;; simple shift instruction use SHIFT eu, none memory
252
(define_insn_reservation  "atom_ishift" 1
253
  (and (eq_attr "cpu" "atom")
254
       (and (eq_attr "type" "ishift")
255
            (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
256
  "atom-simple-0")
257
258
;; simple shift instruction use SHIFT eu, memory
259
(define_insn_reservation  "atom_ishift_mem" 1
260
  (and (eq_attr "cpu" "atom")
261
       (and (eq_attr "type" "ishift")
262
            (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
263
  "atom-simple-0")
264
265
;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles
266
(define_insn_reservation  "atom_ishift_3" 7
267
  (and (eq_attr "cpu" "atom")
268
       (and (eq_attr "type" "ishift")
269
            (eq_attr "prefix_0f" "1")))
270
  "atom-complex, atom-all-eu*6")
271
272
(define_insn_reservation  "atom_ishift1" 1
273
  (and (eq_attr "cpu" "atom")
274
       (and (eq_attr "type" "ishift1")
275
            (eq_attr "memory" "none")))
276
  "atom-simple-0")
277
278
(define_insn_reservation  "atom_ishift1_mem" 1
279
  (and (eq_attr "cpu" "atom")
280
       (and (eq_attr "type" "ishift1")
281
            (eq_attr "memory" "!none")))
282
  "atom-simple-0")
283
284
(define_insn_reservation  "atom_rotate" 1
285
  (and (eq_attr "cpu" "atom")
286
       (and (eq_attr "type" "rotate")
287
            (eq_attr "memory" "none")))
288
  "atom-simple-0")
289
290
(define_insn_reservation  "atom_rotate_mem" 1
291
  (and (eq_attr "cpu" "atom")
292
       (and (eq_attr "type" "rotate")
293
            (eq_attr "memory" "!none")))
294
  "atom-simple-0")
295
296
(define_insn_reservation  "atom_rotate1" 1
297
  (and (eq_attr "cpu" "atom")
298
       (and (eq_attr "type" "rotate1")
299
            (eq_attr "memory" "none")))
300
  "atom-simple-0")
301
302
(define_insn_reservation  "atom_rotate1_mem" 1
303
  (and (eq_attr "cpu" "atom")
304
       (and (eq_attr "type" "rotate1")
305
            (eq_attr "memory" "!none")))
306
  "atom-simple-0")
307
308
(define_insn_reservation  "atom_imul" 5
309
  (and (eq_attr "cpu" "atom")
310
       (and (eq_attr "type" "imul")
311
            (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
312
  "atom-imul-32")
313
314
(define_insn_reservation  "atom_imul_mem" 5
315
  (and (eq_attr "cpu" "atom")
316
       (and (eq_attr "type" "imul")
317
            (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
318
  "atom-imul-32")
319
320
;; latency set to 10 as common 64x64 imul
321
(define_insn_reservation  "atom_imul_3" 10
322
  (and (eq_attr "cpu" "atom")
323
       (and (eq_attr "type" "imul")
324
            (eq_attr "mode" "!SI")))
325
  "atom-complex, atom-all-eu*9")
326
327
(define_insn_reservation  "atom_idiv" 65
328
  (and (eq_attr "cpu" "atom")
329
       (eq_attr "type" "idiv"))
330
  "atom-complex, atom-all-eu*32, nothing*32")
331
332
(define_insn_reservation  "atom_icmp" 1
333
  (and (eq_attr "cpu" "atom")
334
       (and (eq_attr "type" "icmp")
335
            (eq_attr "memory" "none")))
336
  "atom-simple-either")
337
338
(define_insn_reservation  "atom_icmp_mem" 1
339
  (and (eq_attr "cpu" "atom")
340
       (and (eq_attr "type" "icmp")
341
            (eq_attr "memory" "!none")))
342
  "atom-simple-either")
343
344
(define_insn_reservation  "atom_test" 1
345
  (and (eq_attr "cpu" "atom")
346
       (and (eq_attr "type" "test")
347
            (eq_attr "memory" "none")))
348
  "atom-simple-either")
349
350
(define_insn_reservation  "atom_test_mem" 1
351
  (and (eq_attr "cpu" "atom")
352
       (and (eq_attr "type" "test")
353
            (eq_attr "memory" "!none")))
354
  "atom-simple-either")
355
356
(define_insn_reservation  "atom_ibr" 1
357
  (and (eq_attr "cpu" "atom")
358
       (and (eq_attr "type" "ibr")
359
            (eq_attr "memory" "!load")))
360
  "atom-simple-1")
361
362
;; complex if jump target is from address
363
(define_insn_reservation  "atom_ibr_2" 2
364
  (and (eq_attr "cpu" "atom")
365
       (and (eq_attr "type" "ibr")
366
            (eq_attr "memory" "load")))
367
  "atom-complex, atom-all-eu")
368
369
(define_insn_reservation  "atom_setcc" 1
370
  (and (eq_attr "cpu" "atom")
371
       (and (eq_attr "type" "setcc")
372
            (eq_attr "memory" "!store")))
373
  "atom-simple-either")
374
375
;; 2 cycles complex if target is in memory
376
(define_insn_reservation  "atom_setcc_2" 2
377
  (and (eq_attr "cpu" "atom")
378
       (and (eq_attr "type" "setcc")
379
            (eq_attr "memory" "store")))
380
  "atom-complex, atom-all-eu")
381
382
(define_insn_reservation  "atom_icmov" 1
383
  (and (eq_attr "cpu" "atom")
384
       (and (eq_attr "type" "icmov")
385
            (eq_attr "memory" "none")))
386
  "atom-simple-either")
387
388
(define_insn_reservation  "atom_icmov_mem" 1
389
  (and (eq_attr "cpu" "atom")
390
       (and (eq_attr "type" "icmov")
391
            (eq_attr "memory" "!none")))
392
  "atom-simple-either")
393
394
;; UCODE if segreg, ignored
395
(define_insn_reservation  "atom_push" 2
396
  (and (eq_attr "cpu" "atom")
397
       (eq_attr "type" "push"))
398
  "atom-dual-2c")
399
400
;; pop r64 is 1 cycle. UCODE if segreg, ignored
401
(define_insn_reservation  "atom_pop" 1
402
  (and (eq_attr "cpu" "atom")
403
       (and (eq_attr "type" "pop")
404
            (eq_attr "mode" "DI")))
405
  "atom-dual-1c")
406
407
;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
408
(define_insn_reservation  "atom_pop_2" 2
409
  (and (eq_attr "cpu" "atom")
410
       (and (eq_attr "type" "pop")
411
            (eq_attr "mode" "!DI")))
412
  "atom-dual-2c")
413
414
;; UCODE if segreg, ignored
415
(define_insn_reservation  "atom_call" 1
416
  (and (eq_attr "cpu" "atom")
417
       (eq_attr "type" "call"))
418
  "atom-dual-1c")
419
420
(define_insn_reservation  "atom_callv" 1
421
  (and (eq_attr "cpu" "atom")
422
       (eq_attr "type" "callv"))
423
  "atom-dual-1c")
424
425
(define_insn_reservation  "atom_leave" 3
426
  (and (eq_attr "cpu" "atom")
427
       (eq_attr "type" "leave"))
428
  "atom-complex, atom-all-eu*2")
429
430
(define_insn_reservation  "atom_str" 3
431
  (and (eq_attr "cpu" "atom")
432
       (eq_attr "type" "str"))
433
  "atom-complex, atom-all-eu*2")
434
435
(define_insn_reservation  "atom_sselog" 1
436
  (and (eq_attr "cpu" "atom")
437
       (and (eq_attr "type" "sselog")
438
            (eq_attr "memory" "none")))
439
  "atom-simple-either")
440
441
(define_insn_reservation  "atom_sselog_mem" 1
442
  (and (eq_attr "cpu" "atom")
443
       (and (eq_attr "type" "sselog")
444
            (eq_attr "memory" "!none")))
445
  "atom-simple-either")
446
447
(define_insn_reservation  "atom_sselog1" 1
448
  (and (eq_attr "cpu" "atom")
449
       (and (eq_attr "type" "sselog1")
450
            (eq_attr "memory" "none")))
451
  "atom-simple-0")
452
453
(define_insn_reservation  "atom_sselog1_mem" 1
454
  (and (eq_attr "cpu" "atom")
455
       (and (eq_attr "type" "sselog1")
456
            (eq_attr "memory" "!none")))
457
  "atom-simple-0")
458
459
;; not pmad, not psad
460
(define_insn_reservation  "atom_sseiadd" 1
461
  (and (eq_attr "cpu" "atom")
462
       (and (eq_attr "type" "sseiadd")
463
            (and (not (match_operand:V2DI 0 "register_operand"))
464
                 (and (eq_attr "atom_unit" "!simul")
465
                      (eq_attr "atom_unit" "!complex")))))
466
  "atom-simple-either")
467
468
;; pmad, psad and 64
469
(define_insn_reservation  "atom_sseiadd_2" 4
470
  (and (eq_attr "cpu" "atom")
471
       (and (eq_attr "type" "sseiadd")
472
            (and (not (match_operand:V2DI 0 "register_operand"))
473
                 (and (eq_attr "atom_unit" "simul" )
474
                      (eq_attr "mode" "DI")))))
475
  "atom-fmul-4c")
476
477
;; pmad, psad and 128
478
(define_insn_reservation  "atom_sseiadd_3" 5
479
  (and (eq_attr "cpu" "atom")
480
       (and (eq_attr "type" "sseiadd")
481
            (and (not (match_operand:V2DI 0 "register_operand"))
482
                 (and (eq_attr "atom_unit" "simul" )
483
                      (eq_attr "mode" "TI")))))
484
  "atom-fmul-5c")
485
486
;; if paddq(64 bit op), phadd/phsub
487
(define_insn_reservation  "atom_sseiadd_4" 6
488
  (and (eq_attr "cpu" "atom")
489
       (and (eq_attr "type" "sseiadd")
490
            (ior (match_operand:V2DI 0 "register_operand")
491
                 (eq_attr "atom_unit" "complex"))))
492
  "atom-complex, atom-all-eu*5")
493
494
;; if immediate op. 
495
(define_insn_reservation  "atom_sseishft" 1
496
  (and (eq_attr "cpu" "atom")
497
       (and (eq_attr "type" "sseishft")
498
            (and (eq_attr "atom_unit" "!sishuf")
499
                 (match_operand 2 "immediate_operand"))))
500
  "atom-simple-either")
501
502
;; if palignr or psrldq
503
(define_insn_reservation  "atom_sseishft_2" 1
504
  (and (eq_attr "cpu" "atom")
505
       (and (eq_attr "type" "sseishft")
506
            (and (eq_attr "atom_unit" "sishuf")
507
                 (match_operand 2 "immediate_operand"))))
508
  "atom-simple-0")
509
510
;; if reg/mem op
511
(define_insn_reservation  "atom_sseishft_3" 2
512
  (and (eq_attr "cpu" "atom")
513
       (and (eq_attr "type" "sseishft")
514
            (not (match_operand 2 "immediate_operand"))))
515
  "atom-complex, atom-all-eu")
516
517
(define_insn_reservation  "atom_sseimul" 1
518
  (and (eq_attr "cpu" "atom")
519
       (eq_attr "type" "sseimul"))
520
  "atom-simple-0")
521
522
;; rcpss or rsqrtss
523
(define_insn_reservation  "atom_sse" 4
524
  (and (eq_attr "cpu" "atom")
525
       (and (eq_attr "type" "sse")
526
            (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
527
  "atom-fmul-4c")
528
529
;; movshdup, movsldup. Suggest to type sseishft
530
(define_insn_reservation  "atom_sse_2" 1
531
  (and (eq_attr "cpu" "atom")
532
       (and (eq_attr "type" "sse")
533
            (eq_attr "atom_sse_attr" "movdup")))
534
  "atom-simple-0")
535
536
;; lfence
537
(define_insn_reservation  "atom_sse_3" 1
538
  (and (eq_attr "cpu" "atom")
539
       (and (eq_attr "type" "sse")
540
            (eq_attr "atom_sse_attr" "lfence")))
541
  "atom-simple-either")
542
543
;; sfence,clflush,mfence, prefetch
544
(define_insn_reservation  "atom_sse_4" 1
545
  (and (eq_attr "cpu" "atom")
546
       (and (eq_attr "type" "sse")
547
            (ior (eq_attr "atom_sse_attr" "fence")
548
                 (eq_attr "atom_sse_attr" "prefetch"))))
549
  "atom-simple-0")
550
551
;; rcpps, rsqrtss, sqrt, ldmxcsr
552
(define_insn_reservation  "atom_sse_5" 7
553
  (and (eq_attr "cpu" "atom")
554
       (and (eq_attr "type" "sse")
555
            (ior (ior (eq_attr "atom_sse_attr" "sqrt")
556
                      (eq_attr "atom_sse_attr" "mxcsr"))
557
                 (and (eq_attr "atom_sse_attr" "rcp")
558
                      (eq_attr "mode" "V4SF")))))
559
  "atom-complex, atom-all-eu*6")
560
561
;; xmm->xmm
562
(define_insn_reservation  "atom_ssemov" 1
563
  (and (eq_attr "cpu" "atom")
564
       (and (eq_attr "type" "ssemov")
565
            (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy"))))
566
  "atom-simple-either")
567
568
;; reg->xmm
569
(define_insn_reservation  "atom_ssemov_2" 1
570
  (and (eq_attr "cpu" "atom")
571
       (and (eq_attr "type" "ssemov")
572
            (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r"))))
573
  "atom-simple-0")
574
575
;; xmm->reg
576
(define_insn_reservation  "atom_ssemov_3" 3
577
  (and (eq_attr "cpu" "atom")
578
       (and (eq_attr "type" "ssemov")
579
            (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy"))))
580
  "atom-eu-0-3-1")
581
582
;; mov mem
583
(define_insn_reservation  "atom_ssemov_4" 1
584
  (and (eq_attr "cpu" "atom")
585
       (and (eq_attr "type" "ssemov")
586
            (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
587
  "atom-simple-0")
588
589
;; movu mem
590
(define_insn_reservation  "atom_ssemov_5" 2
591
  (and (eq_attr "cpu" "atom")
592
       (and (eq_attr "type" "ssemov")
593
            (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
594
  "atom-complex, atom-all-eu")
595
596
;; no memory simple
597
(define_insn_reservation  "atom_sseadd" 5
598
  (and (eq_attr "cpu" "atom")
599
       (and (eq_attr "type" "sseadd")
600
            (and (eq_attr "memory" "none")
601
                 (and (eq_attr "mode" "!V2DF")
602
                      (eq_attr "atom_unit" "!complex")))))
603
  "atom-fadd-5c")
604
605
;; memory simple
606
(define_insn_reservation  "atom_sseadd_mem" 5
607
  (and (eq_attr "cpu" "atom")
608
       (and (eq_attr "type" "sseadd")
609
            (and (eq_attr "memory" "!none")
610
                 (and (eq_attr "mode" "!V2DF")
611
                      (eq_attr "atom_unit" "!complex")))))
612
  "atom-dual-5c")
613
614
;; maxps, minps, *pd, hadd, hsub
615
(define_insn_reservation  "atom_sseadd_3" 8
616
  (and (eq_attr "cpu" "atom")
617
       (and (eq_attr "type" "sseadd")
618
            (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
619
  "atom-complex, atom-all-eu*7")
620
621
;; Except dppd/dpps
622
(define_insn_reservation  "atom_ssemul" 5
623
  (and (eq_attr "cpu" "atom")
624
       (and (eq_attr "type" "ssemul")
625
            (eq_attr "mode" "!SF")))
626
  "atom-fmul-5c")
627
628
;; Except dppd/dpps, 4 cycle if mulss
629
(define_insn_reservation  "atom_ssemul_2" 4
630
  (and (eq_attr "cpu" "atom")
631
       (and (eq_attr "type" "ssemul")
632
            (eq_attr "mode" "SF")))
633
  "atom-fmul-4c")
634
635
(define_insn_reservation  "atom_ssecmp" 1
636
  (and (eq_attr "cpu" "atom")
637
       (eq_attr "type" "ssecmp"))
638
  "atom-simple-either")
639
640
(define_insn_reservation  "atom_ssecomi" 10
641
  (and (eq_attr "cpu" "atom")
642
       (eq_attr "type" "ssecomi"))
643
  "atom-complex, atom-all-eu*9")
644
645
;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
646
(define_insn_reservation  "atom_ssecvt" 5
647
  (and (eq_attr "cpu" "atom")
648
       (and (eq_attr "type" "ssecvt")
649
            (ior (and (match_operand:V2SI 0 "register_operand")
650
                      (match_operand:V4SF 1 "register_operand"))
651
                 (and (match_operand:V4SF 0 "register_operand")
652
                      (match_operand:V2SI 1 "register_operand")))))
653
  "atom-fadd-5c")
654
655
;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
656
(define_insn_reservation  "atom_ssecvt_2" 5
657
  (and (eq_attr "cpu" "atom")
658
       (and (eq_attr "type" "ssecvt")
659
            (ior (and (match_operand:V2SI 0 "register_operand")
660
                      (match_operand:V4SF 1 "memory_operand"))
661
                 (and (match_operand:V4SF 0 "register_operand")
662
                      (match_operand:V2SI 1 "memory_operand")))))
663
  "atom-dual-5c")
664
665
;; otherwise. 7 cycles average for cvtss2sd
666
(define_insn_reservation  "atom_ssecvt_3" 7
667
  (and (eq_attr "cpu" "atom")
668
       (and (eq_attr "type" "ssecvt")
669
            (not (ior (and (match_operand:V2SI 0 "register_operand")
670
                           (match_operand:V4SF 1 "nonimmediate_operand"))
671
                      (and (match_operand:V4SF 0 "register_operand")
672
                           (match_operand:V2SI 1 "nonimmediate_operand"))))))
673
  "atom-complex, atom-all-eu*6")
674
675
;; memory and cvtsi2sd
676
(define_insn_reservation  "atom_sseicvt" 5
677
  (and (eq_attr "cpu" "atom")
678
       (and (eq_attr "type" "sseicvt")
679
            (and (match_operand:V2DF 0 "register_operand")
680
                 (match_operand:SI 1 "memory_operand"))))
681
  "atom-dual-5c")
682
683
;; otherwise. 8 cycles average for cvtsd2si
684
(define_insn_reservation  "atom_sseicvt_2" 8
685
  (and (eq_attr "cpu" "atom")
686
       (and (eq_attr "type" "sseicvt")
687
            (not (and (match_operand:V2DF 0 "register_operand")
688
                      (match_operand:SI 1 "memory_operand")))))
689
  "atom-complex, atom-all-eu*7")
690
691
(define_insn_reservation  "atom_ssediv" 62
692
  (and (eq_attr "cpu" "atom")
693
       (eq_attr "type" "ssediv"))
694
  "atom-complex, atom-all-eu*12, nothing*49")
695
696
;; simple for fmov
697
(define_insn_reservation  "atom_fmov" 1
698
  (and (eq_attr "cpu" "atom")
699
       (and (eq_attr "type" "fmov")
700
            (eq_attr "memory" "none")))
701
  "atom-simple-either")
702
703
;; simple for fmov
704
(define_insn_reservation  "atom_fmov_mem" 1
705
  (and (eq_attr "cpu" "atom")
706
       (and (eq_attr "type" "fmov")
707
            (eq_attr "memory" "!none")))
708
  "atom-simple-either")
709
710
;; Define bypass here
711
712
;; There will be no stall from lea to non-mem EX insns
713
(define_bypass 0 "atom_lea"
714
                 "atom_alu_carry,
715
                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
716
                  atom_incdec, atom_setcc, atom_icmov, atom_pop")
717
718
(define_bypass 0 "atom_lea"
719
                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
720
                  atom_imovx_mem, atom_imovx_2_mem,
721
                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
722
                 "!ix86_agi_dependent")
723
724
;; There will be 3 cycles stall from EX insns to AGAN insns LEA
725
(define_bypass 4 "atom_alu_carry,
726
                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
727
                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
728
                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
729
                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
730
                  atom_imovx_mem, atom_imovx_2_mem,
731
                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
732
                 "atom_lea")
733
734
;; There will be 3 cycles stall from EX insns to insns need addr calculation
735
(define_bypass 4 "atom_alu_carry,
736
                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
737
                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
738
                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
739
                  atom_imovx_mem, atom_imovx_2_mem,
740
                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
741
                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
742
                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
743
                  atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
744
                  atom_imovx_mem, atom_imovx_2_mem,
745
                  atom_imul_mem, atom_icmp_mem,
746
                  atom_test_mem, atom_icmov_mem, atom_sselog_mem,
747
                  atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem,
748
                  atom_ishift_mem, atom_ishift1_mem, 
749
                  atom_rotate_mem, atom_rotate1_mem"
750
                  "ix86_agi_dependent")
751
752
;; Stall from imul to lea is 8 cycles.
753
(define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea")
754
755
;; Stall from imul to memory address is 8 cycles.
756
(define_bypass 9 "atom_imul, atom_imul_mem" 
757
                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
758
                  atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
759
                  atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem,
760
                  atom_rotate1_mem, atom_imul_mem, atom_icmp_mem,
761
                  atom_test_mem, atom_icmov_mem, atom_sselog_mem,
762
                  atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem"
763
                  "ix86_agi_dependent")
764
765
;; There will be 0 cycle stall from cmp/test to jcc
766
767
;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
768
(define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry,
769
                  atom_alu1, atom_negnot, atom_incdec, atom_ishift,
770
                  atom_ishift1, atom_rotate, atom_rotate1"
771
                 "atom_icmov, atom_alu_carry")
772
773
;; lea to shift count stall is 2 cycles
774
(define_bypass 3 "atom_lea"
775
                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
776
                  atom_ishift_mem, atom_ishift1_mem, 
777
                  atom_rotate_mem, atom_rotate1_mem"
778
                 "ix86_dep_by_shift_count")
779
780
;; lea to shift source stall is 1 cycle
781
(define_bypass 2 "atom_lea"
782
                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1"
783
                 "!ix86_dep_by_shift_count")
784
785
;; non-lea to shift count stall is 1 cycle
786
(define_bypass 2 "atom_alu_carry,
787
                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
788
                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
789
                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
790
                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
791
                  atom_imovx_mem, atom_imovx_2_mem,
792
                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
793
                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
794
                  atom_ishift_mem, atom_ishift1_mem, 
795
                  atom_rotate_mem, atom_rotate1_mem"
796
                 "ix86_dep_by_shift_count")
(-)gcc/config/i386/sse.md (-9 / +47 lines)
Lines 338-343 Link Here
338
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
338
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
339
  "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
339
  "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
340
  [(set_attr "type" "ssemov")
340
  [(set_attr "type" "ssemov")
341
   (set_attr "movu" "1")
341
   (set_attr "prefix" "vex")
342
   (set_attr "prefix" "vex")
342
   (set_attr "mode" "<MODE>")])
343
   (set_attr "mode" "<MODE>")])
343
344
Lines 363-368 Link Here
363
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
364
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
364
  "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
365
  "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
365
  [(set_attr "type" "ssemov")
366
  [(set_attr "type" "ssemov")
367
   (set_attr "movu" "1")
366
   (set_attr "mode" "<MODE>")])
368
   (set_attr "mode" "<MODE>")])
367
369
368
(define_insn "avx_movdqu<avxmodesuffix>"
370
(define_insn "avx_movdqu<avxmodesuffix>"
Lines 373-378 Link Here
373
  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
375
  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
374
  "vmovdqu\t{%1, %0|%0, %1}"
376
  "vmovdqu\t{%1, %0|%0, %1}"
375
  [(set_attr "type" "ssemov")
377
  [(set_attr "type" "ssemov")
378
   (set_attr "movu" "1")
376
   (set_attr "prefix" "vex")
379
   (set_attr "prefix" "vex")
377
   (set_attr "mode" "<avxvecmode>")])
380
   (set_attr "mode" "<avxvecmode>")])
378
381
Lines 383-388 Link Here
383
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
386
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
384
  "movdqu\t{%1, %0|%0, %1}"
387
  "movdqu\t{%1, %0|%0, %1}"
385
  [(set_attr "type" "ssemov")
388
  [(set_attr "type" "ssemov")
389
   (set_attr "movu" "1")
386
   (set_attr "prefix_data16" "1")
390
   (set_attr "prefix_data16" "1")
387
   (set_attr "mode" "TI")])
391
   (set_attr "mode" "TI")])
388
392
Lines 424-430 Link Here
424
		     UNSPEC_MOVNT))]
428
		     UNSPEC_MOVNT))]
425
  "TARGET_SSE2"
429
  "TARGET_SSE2"
426
  "movntdq\t{%1, %0|%0, %1}"
430
  "movntdq\t{%1, %0|%0, %1}"
427
  [(set_attr "type" "ssecvt")
431
  [(set_attr "type" "ssemov")
428
   (set_attr "prefix_data16" "1")
432
   (set_attr "prefix_data16" "1")
429
   (set_attr "mode" "TI")])
433
   (set_attr "mode" "TI")])
430
434
Lines 434-440 Link Here
434
		   UNSPEC_MOVNT))]
438
		   UNSPEC_MOVNT))]
435
  "TARGET_SSE2"
439
  "TARGET_SSE2"
436
  "movnti\t{%1, %0|%0, %1}"
440
  "movnti\t{%1, %0|%0, %1}"
437
  [(set_attr "type" "ssecvt")
441
  [(set_attr "type" "ssemov")
438
   (set_attr "mode" "V2DF")])
442
   (set_attr "mode" "V2DF")])
439
443
440
(define_insn "avx_lddqu<avxmodesuffix>"
444
(define_insn "avx_lddqu<avxmodesuffix>"
Lines 445-450 Link Here
445
  "TARGET_AVX"
449
  "TARGET_AVX"
446
  "vlddqu\t{%1, %0|%0, %1}"
450
  "vlddqu\t{%1, %0|%0, %1}"
447
  [(set_attr "type" "ssecvt")
451
  [(set_attr "type" "ssecvt")
452
   (set_attr "movu" "1")
448
   (set_attr "prefix" "vex")
453
   (set_attr "prefix" "vex")
449
   (set_attr "mode" "<avxvecmode>")])
454
   (set_attr "mode" "<avxvecmode>")])
450
455
Lines 454-460 Link Here
454
		      UNSPEC_LDDQU))]
459
		      UNSPEC_LDDQU))]
455
  "TARGET_SSE3"
460
  "TARGET_SSE3"
456
  "lddqu\t{%1, %0|%0, %1}"
461
  "lddqu\t{%1, %0|%0, %1}"
457
  [(set_attr "type" "ssecvt")
462
  [(set_attr "type" "ssemov")
463
   (set_attr "movu" "1")
458
   (set_attr "prefix_rep" "1")
464
   (set_attr "prefix_rep" "1")
459
   (set_attr "mode" "TI")])
465
   (set_attr "mode" "TI")])
460
466
Lines 761-766 Link Here
761
  "TARGET_SSE"
767
  "TARGET_SSE"
762
  "%vrcpps\t{%1, %0|%0, %1}"
768
  "%vrcpps\t{%1, %0|%0, %1}"
763
  [(set_attr "type" "sse")
769
  [(set_attr "type" "sse")
770
   (set_attr "atom_sse_attr" "rcp")
764
   (set_attr "prefix" "maybe_vex")
771
   (set_attr "prefix" "maybe_vex")
765
   (set_attr "mode" "V4SF")])
772
   (set_attr "mode" "V4SF")])
766
773
Lines 787-792 Link Here
787
  "TARGET_SSE"
794
  "TARGET_SSE"
788
  "rcpss\t{%1, %0|%0, %1}"
795
  "rcpss\t{%1, %0|%0, %1}"
789
  [(set_attr "type" "sse")
796
  [(set_attr "type" "sse")
797
   (set_attr "atom_sse_attr" "rcp")
790
   (set_attr "mode" "SF")])
798
   (set_attr "mode" "SF")])
791
799
792
(define_expand "sqrtv8sf2"
800
(define_expand "sqrtv8sf2"
Lines 832-837 Link Here
832
  "TARGET_SSE"
840
  "TARGET_SSE"
833
  "%vsqrtps\t{%1, %0|%0, %1}"
841
  "%vsqrtps\t{%1, %0|%0, %1}"
834
  [(set_attr "type" "sse")
842
  [(set_attr "type" "sse")
843
   (set_attr "atom_sse_attr" "sqrt")
835
   (set_attr "prefix" "maybe_vex")
844
   (set_attr "prefix" "maybe_vex")
836
   (set_attr "mode" "V4SF")])
845
   (set_attr "mode" "V4SF")])
837
846
Lines 876-881 Link Here
876
  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
885
  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
877
  "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
886
  "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
878
  [(set_attr "type" "sse")
887
  [(set_attr "type" "sse")
888
   (set_attr "atom_sse_attr" "sqrt")
879
   (set_attr "mode" "<ssescalarmode>")])
889
   (set_attr "mode" "<ssescalarmode>")])
880
890
881
(define_expand "rsqrtv8sf2"
891
(define_expand "rsqrtv8sf2"
Lines 1039-1045 Link Here
1039
	 (const_int 1)))]
1049
	 (const_int 1)))]
1040
  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1050
  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1041
  "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1051
  "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1042
  [(set_attr "type" "sse")
1052
  [(set_attr "type" "sseadd")
1043
   (set_attr "mode" "<ssescalarmode>")])
1053
   (set_attr "mode" "<ssescalarmode>")])
1044
1054
1045
;; These versions of the min/max patterns implement exactly the operations
1055
;; These versions of the min/max patterns implement exactly the operations
Lines 1175-1180 Link Here
1175
  "TARGET_SSE3"
1185
  "TARGET_SSE3"
1176
  "addsubpd\t{%2, %0|%0, %2}"
1186
  "addsubpd\t{%2, %0|%0, %2}"
1177
  [(set_attr "type" "sseadd")
1187
  [(set_attr "type" "sseadd")
1188
   (set_attr "atom_unit" "complex")
1178
   (set_attr "mode" "V2DF")])
1189
   (set_attr "mode" "V2DF")])
1179
1190
1180
(define_insn "avx_h<plusminus_insn>v4df3"
1191
(define_insn "avx_h<plusminus_insn>v4df3"
Lines 1298-1303 Link Here
1298
  "TARGET_SSE3"
1309
  "TARGET_SSE3"
1299
  "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1310
  "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1300
  [(set_attr "type" "sseadd")
1311
  [(set_attr "type" "sseadd")
1312
   (set_attr "atom_unit" "complex")
1301
   (set_attr "prefix_rep" "1")
1313
   (set_attr "prefix_rep" "1")
1302
   (set_attr "mode" "V4SF")])
1314
   (set_attr "mode" "V4SF")])
1303
1315
Lines 5066-5071 Link Here
5066
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5078
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5067
  "pmaddwd\t{%2, %0|%0, %2}"
5079
  "pmaddwd\t{%2, %0|%0, %2}"
5068
  [(set_attr "type" "sseiadd")
5080
  [(set_attr "type" "sseiadd")
5081
   (set_attr "atom_unit" "simul")
5069
   (set_attr "prefix_data16" "1")
5082
   (set_attr "prefix_data16" "1")
5070
   (set_attr "mode" "TI")])
5083
   (set_attr "mode" "TI")])
5071
5084
Lines 7025-7030 Link Here
7025
   movq\t{%H1, %0|%0, %H1}
7038
   movq\t{%H1, %0|%0, %H1}
7026
   mov{q}\t{%H1, %0|%0, %H1}"
7039
   mov{q}\t{%H1, %0|%0, %H1}"
7027
  [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7040
  [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7041
   (set_attr "atom_unit" "*,sishuf,*,*")
7028
   (set_attr "memory" "*,none,*,*")
7042
   (set_attr "memory" "*,none,*,*")
7029
   (set_attr "mode" "V2SF,TI,TI,DI")])
7043
   (set_attr "mode" "V2SF,TI,TI,DI")])
7030
7044
Lines 7057-7062 Link Here
7057
   psrldq\t{$8, %0|%0, 8}
7071
   psrldq\t{$8, %0|%0, 8}
7058
   movq\t{%H1, %0|%0, %H1}"
7072
   movq\t{%H1, %0|%0, %H1}"
7059
  [(set_attr "type" "ssemov,sseishft,ssemov")
7073
  [(set_attr "type" "ssemov,sseishft,ssemov")
7074
   (set_attr "atom_unit" "*,sishuf,*")
7060
   (set_attr "memory" "*,none,*")
7075
   (set_attr "memory" "*,none,*")
7061
   (set_attr "mode" "V2SF,TI,TI")])
7076
   (set_attr "mode" "V2SF,TI,TI")])
7062
7077
Lines 7614-7619 Link Here
7614
  "TARGET_SSE2"
7629
  "TARGET_SSE2"
7615
  "psadbw\t{%2, %0|%0, %2}"
7630
  "psadbw\t{%2, %0|%0, %2}"
7616
  [(set_attr "type" "sseiadd")
7631
  [(set_attr "type" "sseiadd")
7632
   (set_attr "atom_unit" "simul")
7617
   (set_attr "prefix_data16" "1")
7633
   (set_attr "prefix_data16" "1")
7618
   (set_attr "mode" "TI")])
7634
   (set_attr "mode" "TI")])
7619
7635
Lines 7635-7641 Link Here
7635
	  UNSPEC_MOVMSK))]
7651
	  UNSPEC_MOVMSK))]
7636
  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7652
  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7637
  "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7653
  "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7638
  [(set_attr "type" "ssecvt")
7654
  [(set_attr "type" "ssemov")
7639
   (set_attr "prefix" "maybe_vex")
7655
   (set_attr "prefix" "maybe_vex")
7640
   (set_attr "mode" "<MODE>")])
7656
   (set_attr "mode" "<MODE>")])
7641
7657
Lines 7645-7651 Link Here
7645
		   UNSPEC_MOVMSK))]
7661
		   UNSPEC_MOVMSK))]
7646
  "TARGET_SSE2"
7662
  "TARGET_SSE2"
7647
  "%vpmovmskb\t{%1, %0|%0, %1}"
7663
  "%vpmovmskb\t{%1, %0|%0, %1}"
7648
  [(set_attr "type" "ssecvt")
7664
  [(set_attr "type" "ssemov")
7649
   (set_attr "prefix_data16" "1")
7665
   (set_attr "prefix_data16" "1")
7650
   (set_attr "prefix" "maybe_vex")
7666
   (set_attr "prefix" "maybe_vex")
7651
   (set_attr "mode" "SI")])
7667
   (set_attr "mode" "SI")])
Lines 7668-7674 Link Here
7668
  "TARGET_SSE2 && !TARGET_64BIT"
7684
  "TARGET_SSE2 && !TARGET_64BIT"
7669
  ;; @@@ check ordering of operands in intel/nonintel syntax
7685
  ;; @@@ check ordering of operands in intel/nonintel syntax
7670
  "%vmaskmovdqu\t{%2, %1|%1, %2}"
7686
  "%vmaskmovdqu\t{%2, %1|%1, %2}"
7671
  [(set_attr "type" "ssecvt")
7687
  [(set_attr "type" "ssemov")
7672
   (set_attr "prefix_data16" "1")
7688
   (set_attr "prefix_data16" "1")
7673
   (set_attr "prefix" "maybe_vex")
7689
   (set_attr "prefix" "maybe_vex")
7674
   (set_attr "mode" "TI")])
7690
   (set_attr "mode" "TI")])
Lines 7682-7688 Link Here
7682
  "TARGET_SSE2 && TARGET_64BIT"
7698
  "TARGET_SSE2 && TARGET_64BIT"
7683
  ;; @@@ check ordering of operands in intel/nonintel syntax
7699
  ;; @@@ check ordering of operands in intel/nonintel syntax
7684
  "%vmaskmovdqu\t{%2, %1|%1, %2}"
7700
  "%vmaskmovdqu\t{%2, %1|%1, %2}"
7685
  [(set_attr "type" "ssecvt")
7701
  [(set_attr "type" "ssemov")
7686
   (set_attr "prefix_data16" "1")
7702
   (set_attr "prefix_data16" "1")
7687
   (set_attr "prefix" "maybe_vex")
7703
   (set_attr "prefix" "maybe_vex")
7688
   (set_attr "mode" "TI")])
7704
   (set_attr "mode" "TI")])
Lines 7693-7698 Link Here
7693
  "TARGET_SSE"
7709
  "TARGET_SSE"
7694
  "%vldmxcsr\t%0"
7710
  "%vldmxcsr\t%0"
7695
  [(set_attr "type" "sse")
7711
  [(set_attr "type" "sse")
7712
   (set_attr "atom_sse_attr" "mxcsr")
7696
   (set_attr "prefix" "maybe_vex")
7713
   (set_attr "prefix" "maybe_vex")
7697
   (set_attr "memory" "load")])
7714
   (set_attr "memory" "load")])
7698
7715
Lines 7702-7707 Link Here
7702
  "TARGET_SSE"
7719
  "TARGET_SSE"
7703
  "%vstmxcsr\t%0"
7720
  "%vstmxcsr\t%0"
7704
  [(set_attr "type" "sse")
7721
  [(set_attr "type" "sse")
7722
   (set_attr "atom_sse_attr" "mxcsr")
7705
   (set_attr "prefix" "maybe_vex")
7723
   (set_attr "prefix" "maybe_vex")
7706
   (set_attr "memory" "store")])
7724
   (set_attr "memory" "store")])
7707
7725
Lines 7720-7725 Link Here
7720
  "TARGET_SSE || TARGET_3DNOW_A"
7738
  "TARGET_SSE || TARGET_3DNOW_A"
7721
  "sfence"
7739
  "sfence"
7722
  [(set_attr "type" "sse")
7740
  [(set_attr "type" "sse")
7741
   (set_attr "atom_sse_attr" "fence")
7723
   (set_attr "memory" "unknown")])
7742
   (set_attr "memory" "unknown")])
7724
7743
7725
(define_insn "sse2_clflush"
7744
(define_insn "sse2_clflush"
Lines 7728-7733 Link Here
7728
  "TARGET_SSE2"
7747
  "TARGET_SSE2"
7729
  "clflush\t%a0"
7748
  "clflush\t%a0"
7730
  [(set_attr "type" "sse")
7749
  [(set_attr "type" "sse")
7750
   (set_attr "atom_sse_attr" "fence")
7731
   (set_attr "memory" "unknown")])
7751
   (set_attr "memory" "unknown")])
7732
7752
7733
(define_expand "sse2_mfence"
7753
(define_expand "sse2_mfence"
Lines 7745-7750 Link Here
7745
  "TARGET_64BIT || TARGET_SSE2"
7765
  "TARGET_64BIT || TARGET_SSE2"
7746
  "mfence"
7766
  "mfence"
7747
  [(set_attr "type" "sse")
7767
  [(set_attr "type" "sse")
7768
   (set_attr "atom_sse_attr" "fence")
7748
   (set_attr "memory" "unknown")])
7769
   (set_attr "memory" "unknown")])
7749
7770
7750
(define_expand "sse2_lfence"
7771
(define_expand "sse2_lfence"
Lines 7762-7767 Link Here
7762
  "TARGET_SSE2"
7783
  "TARGET_SSE2"
7763
  "lfence"
7784
  "lfence"
7764
  [(set_attr "type" "sse")
7785
  [(set_attr "type" "sse")
7786
   (set_attr "atom_sse_attr" "lfence")
7765
   (set_attr "memory" "unknown")])
7787
   (set_attr "memory" "unknown")])
7766
7788
7767
(define_insn "sse3_mwait"
7789
(define_insn "sse3_mwait"
Lines 7885-7890 Link Here
7885
  "TARGET_SSSE3"
7907
  "TARGET_SSSE3"
7886
  "phaddw\t{%2, %0|%0, %2}"
7908
  "phaddw\t{%2, %0|%0, %2}"
7887
  [(set_attr "type" "sseiadd")
7909
  [(set_attr "type" "sseiadd")
7910
   (set_attr "atom_unit" "complex")
7888
   (set_attr "prefix_data16" "1")
7911
   (set_attr "prefix_data16" "1")
7889
   (set_attr "prefix_extra" "1")
7912
   (set_attr "prefix_extra" "1")
7890
   (set_attr "mode" "TI")])
7913
   (set_attr "mode" "TI")])
Lines 7913-7918 Link Here
7913
  "TARGET_SSSE3"
7936
  "TARGET_SSSE3"
7914
  "phaddw\t{%2, %0|%0, %2}"
7937
  "phaddw\t{%2, %0|%0, %2}"
7915
  [(set_attr "type" "sseiadd")
7938
  [(set_attr "type" "sseiadd")
7939
   (set_attr "atom_unit" "complex")
7916
   (set_attr "prefix_extra" "1")
7940
   (set_attr "prefix_extra" "1")
7917
   (set_attr "mode" "DI")])
7941
   (set_attr "mode" "DI")])
7918
7942
Lines 7967-7972 Link Here
7967
  "TARGET_SSSE3"
7991
  "TARGET_SSSE3"
7968
  "phaddd\t{%2, %0|%0, %2}"
7992
  "phaddd\t{%2, %0|%0, %2}"
7969
  [(set_attr "type" "sseiadd")
7993
  [(set_attr "type" "sseiadd")
7994
   (set_attr "atom_unit" "complex")
7970
   (set_attr "prefix_data16" "1")
7995
   (set_attr "prefix_data16" "1")
7971
   (set_attr "prefix_extra" "1")
7996
   (set_attr "prefix_extra" "1")
7972
   (set_attr "mode" "TI")])
7997
   (set_attr "mode" "TI")])
Lines 7987-7992 Link Here
7987
  "TARGET_SSSE3"
8012
  "TARGET_SSSE3"
7988
  "phaddd\t{%2, %0|%0, %2}"
8013
  "phaddd\t{%2, %0|%0, %2}"
7989
  [(set_attr "type" "sseiadd")
8014
  [(set_attr "type" "sseiadd")
8015
   (set_attr "atom_unit" "complex")
7990
   (set_attr "prefix_extra" "1")
8016
   (set_attr "prefix_extra" "1")
7991
   (set_attr "mode" "DI")])
8017
   (set_attr "mode" "DI")])
7992
8018
Lines 8073-8078 Link Here
8073
  "TARGET_SSSE3"
8099
  "TARGET_SSSE3"
8074
  "phaddsw\t{%2, %0|%0, %2}"
8100
  "phaddsw\t{%2, %0|%0, %2}"
8075
  [(set_attr "type" "sseiadd")
8101
  [(set_attr "type" "sseiadd")
8102
   (set_attr "atom_unit" "complex")
8076
   (set_attr "prefix_data16" "1")
8103
   (set_attr "prefix_data16" "1")
8077
   (set_attr "prefix_extra" "1")
8104
   (set_attr "prefix_extra" "1")
8078
   (set_attr "mode" "TI")])
8105
   (set_attr "mode" "TI")])
Lines 8101-8106 Link Here
8101
  "TARGET_SSSE3"
8128
  "TARGET_SSSE3"
8102
  "phaddsw\t{%2, %0|%0, %2}"
8129
  "phaddsw\t{%2, %0|%0, %2}"
8103
  [(set_attr "type" "sseiadd")
8130
  [(set_attr "type" "sseiadd")
8131
   (set_attr "atom_unit" "complex")
8104
   (set_attr "prefix_extra" "1")
8132
   (set_attr "prefix_extra" "1")
8105
   (set_attr "mode" "DI")])
8133
   (set_attr "mode" "DI")])
8106
8134
Lines 8187-8192 Link Here
8187
  "TARGET_SSSE3"
8215
  "TARGET_SSSE3"
8188
  "phsubw\t{%2, %0|%0, %2}"
8216
  "phsubw\t{%2, %0|%0, %2}"
8189
  [(set_attr "type" "sseiadd")
8217
  [(set_attr "type" "sseiadd")
8218
   (set_attr "atom_unit" "complex")
8190
   (set_attr "prefix_data16" "1")
8219
   (set_attr "prefix_data16" "1")
8191
   (set_attr "prefix_extra" "1")
8220
   (set_attr "prefix_extra" "1")
8192
   (set_attr "mode" "TI")])
8221
   (set_attr "mode" "TI")])
Lines 8215-8220 Link Here
8215
  "TARGET_SSSE3"
8244
  "TARGET_SSSE3"
8216
  "phsubw\t{%2, %0|%0, %2}"
8245
  "phsubw\t{%2, %0|%0, %2}"
8217
  [(set_attr "type" "sseiadd")
8246
  [(set_attr "type" "sseiadd")
8247
   (set_attr "atom_unit" "complex")
8218
   (set_attr "prefix_extra" "1")
8248
   (set_attr "prefix_extra" "1")
8219
   (set_attr "mode" "DI")])
8249
   (set_attr "mode" "DI")])
8220
8250
Lines 8269-8274 Link Here
8269
  "TARGET_SSSE3"
8299
  "TARGET_SSSE3"
8270
  "phsubd\t{%2, %0|%0, %2}"
8300
  "phsubd\t{%2, %0|%0, %2}"
8271
  [(set_attr "type" "sseiadd")
8301
  [(set_attr "type" "sseiadd")
8302
   (set_attr "atom_unit" "complex")
8272
   (set_attr "prefix_data16" "1")
8303
   (set_attr "prefix_data16" "1")
8273
   (set_attr "prefix_extra" "1")
8304
   (set_attr "prefix_extra" "1")
8274
   (set_attr "mode" "TI")])
8305
   (set_attr "mode" "TI")])
Lines 8289-8294 Link Here
8289
  "TARGET_SSSE3"
8320
  "TARGET_SSSE3"
8290
  "phsubd\t{%2, %0|%0, %2}"
8321
  "phsubd\t{%2, %0|%0, %2}"
8291
  [(set_attr "type" "sseiadd")
8322
  [(set_attr "type" "sseiadd")
8323
   (set_attr "atom_unit" "complex")
8292
   (set_attr "prefix_extra" "1")
8324
   (set_attr "prefix_extra" "1")
8293
   (set_attr "mode" "DI")])
8325
   (set_attr "mode" "DI")])
8294
8326
Lines 8375-8380 Link Here
8375
  "TARGET_SSSE3"
8407
  "TARGET_SSSE3"
8376
  "phsubsw\t{%2, %0|%0, %2}"
8408
  "phsubsw\t{%2, %0|%0, %2}"
8377
  [(set_attr "type" "sseiadd")
8409
  [(set_attr "type" "sseiadd")
8410
   (set_attr "atom_unit" "complex")
8378
   (set_attr "prefix_data16" "1")
8411
   (set_attr "prefix_data16" "1")
8379
   (set_attr "prefix_extra" "1")
8412
   (set_attr "prefix_extra" "1")
8380
   (set_attr "mode" "TI")])
8413
   (set_attr "mode" "TI")])
Lines 8403-8408 Link Here
8403
  "TARGET_SSSE3"
8436
  "TARGET_SSSE3"
8404
  "phsubsw\t{%2, %0|%0, %2}"
8437
  "phsubsw\t{%2, %0|%0, %2}"
8405
  [(set_attr "type" "sseiadd")
8438
  [(set_attr "type" "sseiadd")
8439
   (set_attr "atom_unit" "complex")
8406
   (set_attr "prefix_extra" "1")
8440
   (set_attr "prefix_extra" "1")
8407
   (set_attr "mode" "DI")])
8441
   (set_attr "mode" "DI")])
8408
8442
Lines 8509-8514 Link Here
8509
  "TARGET_SSSE3"
8543
  "TARGET_SSSE3"
8510
  "pmaddubsw\t{%2, %0|%0, %2}"
8544
  "pmaddubsw\t{%2, %0|%0, %2}"
8511
  [(set_attr "type" "sseiadd")
8545
  [(set_attr "type" "sseiadd")
8546
   (set_attr "atom_unit" "simul")
8512
   (set_attr "prefix_data16" "1")
8547
   (set_attr "prefix_data16" "1")
8513
   (set_attr "prefix_extra" "1")
8548
   (set_attr "prefix_extra" "1")
8514
   (set_attr "mode" "TI")])
8549
   (set_attr "mode" "TI")])
Lines 8547-8552 Link Here
8547
  "TARGET_SSSE3"
8582
  "TARGET_SSSE3"
8548
  "pmaddubsw\t{%2, %0|%0, %2}"
8583
  "pmaddubsw\t{%2, %0|%0, %2}"
8549
  [(set_attr "type" "sseiadd")
8584
  [(set_attr "type" "sseiadd")
8585
   (set_attr "atom_unit" "simul")
8550
   (set_attr "prefix_extra" "1")
8586
   (set_attr "prefix_extra" "1")
8551
   (set_attr "mode" "DI")])
8587
   (set_attr "mode" "DI")])
8552
8588
Lines 8754-8759 Link Here
8754
  return "palignr\t{%3, %2, %0|%0, %2, %3}";
8790
  return "palignr\t{%3, %2, %0|%0, %2, %3}";
8755
}
8791
}
8756
  [(set_attr "type" "sseishft")
8792
  [(set_attr "type" "sseishft")
8793
   (set_attr "atom_unit" "sishuf")
8757
   (set_attr "prefix_data16" "1")
8794
   (set_attr "prefix_data16" "1")
8758
   (set_attr "prefix_extra" "1")
8795
   (set_attr "prefix_extra" "1")
8759
   (set_attr "mode" "TI")])
8796
   (set_attr "mode" "TI")])
Lines 8770-8775 Link Here
8770
  return "palignr\t{%3, %2, %0|%0, %2, %3}";
8807
  return "palignr\t{%3, %2, %0|%0, %2, %3}";
8771
}
8808
}
8772
  [(set_attr "type" "sseishft")
8809
  [(set_attr "type" "sseishft")
8810
   (set_attr "atom_unit" "sishuf")
8773
   (set_attr "prefix_extra" "1")
8811
   (set_attr "prefix_extra" "1")
8774
   (set_attr "mode" "DI")])
8812
   (set_attr "mode" "DI")])
8775
8813
Lines 8956-8962 Link Here
8956
		     UNSPEC_MOVNTDQA))]
8994
		     UNSPEC_MOVNTDQA))]
8957
  "TARGET_SSE4_1"
8995
  "TARGET_SSE4_1"
8958
  "%vmovntdqa\t{%1, %0|%0, %1}"
8996
  "%vmovntdqa\t{%1, %0|%0, %1}"
8959
  [(set_attr "type" "ssecvt")
8997
  [(set_attr "type" "ssemov")
8960
   (set_attr "prefix_extra" "1")
8998
   (set_attr "prefix_extra" "1")
8961
   (set_attr "prefix" "maybe_vex")
8999
   (set_attr "prefix" "maybe_vex")
8962
   (set_attr "mode" "TI")])
9000
   (set_attr "mode" "TI")])
(-)gcc/config/i386/i386-c.c (+7 lines)
Lines 119-124 Link Here
119
      def_or_undef (parse_in, "__core2");
119
      def_or_undef (parse_in, "__core2");
120
      def_or_undef (parse_in, "__core2__");
120
      def_or_undef (parse_in, "__core2__");
121
      break;
121
      break;
122
    case PROCESSOR_ATOM:
123
      def_or_undef (parse_in, "__atom");
124
      def_or_undef (parse_in, "__atom__");
125
      break;
122
    /* use PROCESSOR_max to not set/unset the arch macro.  */
126
    /* use PROCESSOR_max to not set/unset the arch macro.  */
123
    case PROCESSOR_max:
127
    case PROCESSOR_max:
124
      break;
128
      break;
Lines 187-192 Link Here
187
    case PROCESSOR_CORE2:
191
    case PROCESSOR_CORE2:
188
      def_or_undef (parse_in, "__tune_core2__");
192
      def_or_undef (parse_in, "__tune_core2__");
189
      break;
193
      break;
194
    case PROCESSOR_ATOM:
195
      def_or_undef (parse_in, "__tune_atom__");
196
      break;
190
    case PROCESSOR_GENERIC32:
197
    case PROCESSOR_GENERIC32:
191
    case PROCESSOR_GENERIC64:
198
    case PROCESSOR_GENERIC64:
192
      break;
199
      break;
(-)gcc/config/i386/i386-protos.h (+3 lines)
Lines 85-90 Link Here
85
extern void ix86_expand_binary_operator (enum rtx_code,
85
extern void ix86_expand_binary_operator (enum rtx_code,
86
					 enum machine_mode, rtx[]);
86
					 enum machine_mode, rtx[]);
87
extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
87
extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
88
extern bool ix86_lea_for_add_ok (enum rtx_code, rtx, rtx[]);
89
extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
90
extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn);
88
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
91
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
89
					rtx[]);
92
					rtx[]);
90
extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
93
extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
(-)gcc/config/i386/i386.c (-66 / +407 lines)
Lines 1036-1041 Link Here
1036
  1,                                    /* cond_not_taken_branch_cost.  */
1036
  1,                                    /* cond_not_taken_branch_cost.  */
1037
};
1037
};
1038
1038
1039
static const
1040
struct processor_costs atom_cost = {
1041
  COSTS_N_INSNS (1),			/* cost of an add instruction */
1042
  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1043
  COSTS_N_INSNS (1),			/* variable shift costs */
1044
  COSTS_N_INSNS (1),			/* constant shift costs */
1045
  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1046
   COSTS_N_INSNS (4),			/*                               HI */
1047
   COSTS_N_INSNS (3),			/*                               SI */
1048
   COSTS_N_INSNS (4),			/*                               DI */
1049
   COSTS_N_INSNS (2)},			/*                               other */
1050
  0,					/* cost of multiply per each bit set */
1051
  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1052
   COSTS_N_INSNS (26),			/*                          HI */
1053
   COSTS_N_INSNS (42),			/*                          SI */
1054
   COSTS_N_INSNS (74),			/*                          DI */
1055
   COSTS_N_INSNS (74)},			/*                          other */
1056
  COSTS_N_INSNS (1),			/* cost of movsx */
1057
  COSTS_N_INSNS (1),			/* cost of movzx */
1058
  8,					/* "large" insn */
1059
  17,					/* MOVE_RATIO */
1060
  2,					/* cost for loading QImode using movzbl */
1061
  {4, 4, 4},				/* cost of loading integer registers
1062
					   in QImode, HImode and SImode.
1063
					   Relative to reg-reg move (2).  */
1064
  {4, 4, 4},				/* cost of storing integer registers */
1065
  4,					/* cost of reg,reg fld/fst */
1066
  {12, 12, 12},				/* cost of loading fp registers
1067
					   in SFmode, DFmode and XFmode */
1068
  {6, 6, 8},				/* cost of storing fp registers
1069
					   in SFmode, DFmode and XFmode */
1070
  2,					/* cost of moving MMX register */
1071
  {8, 8},				/* cost of loading MMX registers
1072
					   in SImode and DImode */
1073
  {8, 8},				/* cost of storing MMX registers
1074
					   in SImode and DImode */
1075
  2,					/* cost of moving SSE register */
1076
  {8, 8, 8},				/* cost of loading SSE registers
1077
					   in SImode, DImode and TImode */
1078
  {8, 8, 8},				/* cost of storing SSE registers
1079
					   in SImode, DImode and TImode */
1080
  5,					/* MMX or SSE register to integer */
1081
  32,					/* size of l1 cache.  */
1082
  256,					/* size of l2 cache.  */
1083
  64,					/* size of prefetch block */
1084
  6,					/* number of parallel prefetches */
1085
  3,					/* Branch cost */
1086
  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
1087
  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1088
  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
1089
  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
1090
  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
1091
  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
1092
  {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1093
   {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1094
          {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1095
  {{libcall, {{8, loop}, {15, unrolled_loop},
1096
          {2048, rep_prefix_4_byte}, {-1, libcall}}},
1097
   {libcall, {{24, loop}, {32, unrolled_loop},
1098
          {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099
  1,                                    /* scalar_stmt_cost.  */
1100
  1,                                    /* scalar load_cost.  */
1101
  1,                                    /* scalar_store_cost.  */
1102
  1,                                    /* vec_stmt_cost.  */
1103
  1,                                    /* vec_to_scalar_cost.  */
1104
  1,                                    /* scalar_to_vec_cost.  */
1105
  1,                                    /* vec_align_load_cost.  */
1106
  2,                                    /* vec_unalign_load_cost.  */
1107
  1,                                    /* vec_store_cost.  */
1108
  3,                                    /* cond_taken_branch_cost.  */
1109
  1,                                    /* cond_not_taken_branch_cost.  */
1110
};
1111
1039
/* Generic64 should produce code tuned for Nocona and K8.  */
1112
/* Generic64 should produce code tuned for Nocona and K8.  */
1040
static const
1113
static const
1041
struct processor_costs generic64_cost = {
1114
struct processor_costs generic64_cost = {
Lines 1194-1199 Link Here
1194
#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
1267
#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
1195
#define m_NOCONA  (1<<PROCESSOR_NOCONA)
1268
#define m_NOCONA  (1<<PROCESSOR_NOCONA)
1196
#define m_CORE2  (1<<PROCESSOR_CORE2)
1269
#define m_CORE2  (1<<PROCESSOR_CORE2)
1270
#define m_ATOM  (1<<PROCESSOR_ATOM)
1197
1271
1198
#define m_GEODE  (1<<PROCESSOR_GEODE)
1272
#define m_GEODE  (1<<PROCESSOR_GEODE)
1199
#define m_K6  (1<<PROCESSOR_K6)
1273
#define m_K6  (1<<PROCESSOR_K6)
Lines 1231-1240 Link Here
1231
  m_486 | m_PENT,
1305
  m_486 | m_PENT,
1232
1306
1233
  /* X86_TUNE_UNROLL_STRLEN */
1307
  /* X86_TUNE_UNROLL_STRLEN */
1234
  m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1308
  m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1309
  | m_CORE2 | m_GENERIC,
1235
1310
1236
  /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311
  /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1237
  m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1312
  m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1238
1313
1239
  /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314
  /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1240
     on simulation result. But after P4 was made, no performance benefit
1315
     on simulation result. But after P4 was made, no performance benefit
Lines 1246-1257 Link Here
1246
  ~m_386,
1321
  ~m_386,
1247
1322
1248
  /* X86_TUNE_USE_SAHF */
1323
  /* X86_TUNE_USE_SAHF */
1249
  m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324
  m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1250
  | m_NOCONA | m_CORE2 | m_GENERIC,
1325
  | m_NOCONA | m_CORE2 | m_GENERIC,
1251
1326
1252
  /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327
  /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1253
     partial dependencies.  */
1328
     partial dependencies.  */
1254
  m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1329
  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1255
  | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1330
  | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1256
1331
1257
  /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332
  /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
Lines 1271-1283 Link Here
1271
  m_386 | m_486 | m_K6_GEODE,
1346
  m_386 | m_486 | m_K6_GEODE,
1272
1347
1273
  /* X86_TUNE_USE_SIMODE_FIOP */
1348
  /* X86_TUNE_USE_SIMODE_FIOP */
1274
  ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1349
  ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1275
1350
1276
  /* X86_TUNE_USE_MOV0 */
1351
  /* X86_TUNE_USE_MOV0 */
1277
  m_K6,
1352
  m_K6,
1278
1353
1279
  /* X86_TUNE_USE_CLTD */
1354
  /* X86_TUNE_USE_CLTD */
1280
  ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1355
  ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1281
1356
1282
  /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
1357
  /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
1283
  m_PENT4,
1358
  m_PENT4,
Lines 1292-1299 Link Here
1292
  ~(m_PENT | m_PPRO),
1367
  ~(m_PENT | m_PPRO),
1293
1368
1294
  /* X86_TUNE_PROMOTE_QIMODE */
1369
  /* X86_TUNE_PROMOTE_QIMODE */
1295
  m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1370
  m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1296
  | m_GENERIC /* | m_PENT4 ? */,
1371
  | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1297
1372
1298
  /* X86_TUNE_FAST_PREFIX */
1373
  /* X86_TUNE_FAST_PREFIX */
1299
  ~(m_PENT | m_486 | m_386),
1374
  ~(m_PENT | m_486 | m_386),
Lines 1317-1342 Link Here
1317
  m_PPRO,
1392
  m_PPRO,
1318
1393
1319
  /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop.  */
1394
  /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop.  */
1320
  m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1395
  m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1396
  | m_CORE2 | m_GENERIC,
1321
1397
1322
  /* X86_TUNE_ADD_ESP_8 */
1398
  /* X86_TUNE_ADD_ESP_8 */
1323
  m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1399
  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1324
  | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1400
  | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1325
1401
1326
  /* X86_TUNE_SUB_ESP_4 */
1402
  /* X86_TUNE_SUB_ESP_4 */
1327
  m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1403
  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1404
  | m_GENERIC,
1328
1405
1329
  /* X86_TUNE_SUB_ESP_8 */
1406
  /* X86_TUNE_SUB_ESP_8 */
1330
  m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1407
  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1331
  | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1408
  | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1332
1409
1333
  /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410
  /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1334
     for DFmode copies */
1411
     for DFmode copies */
1335
  ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412
  ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1336
    | m_GENERIC | m_GEODE),
1413
    | m_GENERIC | m_GEODE),
1337
1414
1338
  /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415
  /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1339
  m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1416
  m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1340
1417
1341
  /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418
  /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1342
     conflict here in between PPro/Pentium4 based chips that thread 128bit
1419
     conflict here in between PPro/Pentium4 based chips that thread 128bit
Lines 1347-1353 Link Here
1347
     shows that disabling this option on P4 brings over 20% SPECfp regression,
1424
     shows that disabling this option on P4 brings over 20% SPECfp regression,
1348
     while enabling it on K8 brings roughly 2.4% regression that can be partly
1425
     while enabling it on K8 brings roughly 2.4% regression that can be partly
1349
     masked by careful scheduling of moves.  */
1426
     masked by careful scheduling of moves.  */
1350
  m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1427
  m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1428
  | m_AMDFAM10,
1351
1429
1352
  /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1430
  /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1353
  m_AMDFAM10,
1431
  m_AMDFAM10,
Lines 1365-1377 Link Here
1365
  m_PPRO | m_PENT4 | m_NOCONA,
1443
  m_PPRO | m_PENT4 | m_NOCONA,
1366
1444
1367
  /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445
  /* X86_TUNE_MEMORY_MISMATCH_STALL */
1368
  m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1446
  m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1369
1447
1370
  /* X86_TUNE_PROLOGUE_USING_MOVE */
1448
  /* X86_TUNE_PROLOGUE_USING_MOVE */
1371
  m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1449
  m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1372
1450
1373
  /* X86_TUNE_EPILOGUE_USING_MOVE */
1451
  /* X86_TUNE_EPILOGUE_USING_MOVE */
1374
  m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1452
  m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1375
1453
1376
  /* X86_TUNE_SHIFT1 */
1454
  /* X86_TUNE_SHIFT1 */
1377
  ~m_486,
1455
  ~m_486,
Lines 1380-1408 Link Here
1380
  m_AMD_MULTIPLE,
1458
  m_AMD_MULTIPLE,
1381
1459
1382
  /* X86_TUNE_INTER_UNIT_MOVES */
1460
  /* X86_TUNE_INTER_UNIT_MOVES */
1383
  ~(m_AMD_MULTIPLE | m_GENERIC),
1461
  ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1384
1462
1385
  /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1463
  /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1386
  ~(m_AMDFAM10),
1464
  ~(m_AMDFAM10),
1387
1465
1388
  /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466
  /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1389
     than 4 branch instructions in the 16 byte window.  */
1467
     than 4 branch instructions in the 16 byte window.  */
1390
  m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1468
  m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1469
  | m_GENERIC,
1391
1470
1392
  /* X86_TUNE_SCHEDULE */
1471
  /* X86_TUNE_SCHEDULE */
1393
  m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1472
  m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1473
  | m_GENERIC,
1394
1474
1395
  /* X86_TUNE_USE_BT */
1475
  /* X86_TUNE_USE_BT */
1396
  m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1476
  m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1397
1477
1398
  /* X86_TUNE_USE_INCDEC */
1478
  /* X86_TUNE_USE_INCDEC */
1399
  ~(m_PENT4 | m_NOCONA | m_GENERIC),
1479
  ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1400
1480
1401
  /* X86_TUNE_PAD_RETURNS */
1481
  /* X86_TUNE_PAD_RETURNS */
1402
  m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1482
  m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1403
1483
1404
  /* X86_TUNE_EXT_80387_CONSTANTS */
1484
  /* X86_TUNE_EXT_80387_CONSTANTS */
1405
  m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1485
  m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1486
  | m_CORE2 | m_GENERIC,
1406
1487
1407
  /* X86_TUNE_SHORTEN_X87_SSE */
1488
  /* X86_TUNE_SHORTEN_X87_SSE */
1408
  ~m_K8,
1489
  ~m_K8,
Lines 1447-1452 Link Here
1447
     with a subsequent conditional jump instruction into a single
1528
     with a subsequent conditional jump instruction into a single
1448
     compare-and-branch uop.  */
1529
     compare-and-branch uop.  */
1449
  m_CORE2,
1530
  m_CORE2,
1531
1532
  /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1533
     will impact LEA instruction selection. */
1534
  m_ATOM,
1450
};
1535
};
1451
1536
1452
/* Feature tests against the various architecture variations.  */
1537
/* Feature tests against the various architecture variations.  */
Lines 1472-1481 Link Here
1472
};
1557
};
1473
1558
1474
static const unsigned int x86_accumulate_outgoing_args
1559
static const unsigned int x86_accumulate_outgoing_args
1475
  = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1560
  = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1561
    | m_GENERIC;
1476
1562
1477
static const unsigned int x86_arch_always_fancy_math_387
1563
static const unsigned int x86_arch_always_fancy_math_387
1478
  = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564
  = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1479
    | m_NOCONA | m_CORE2 | m_GENERIC;
1565
    | m_NOCONA | m_CORE2 | m_GENERIC;
1480
1566
1481
static enum stringop_alg stringop_alg = no_stringop;
1567
static enum stringop_alg stringop_alg = no_stringop;
Lines 1952-1958 Link Here
1952
  {&core2_cost, 16, 10, 16, 10, 16},
2038
  {&core2_cost, 16, 10, 16, 10, 16},
1953
  {&generic32_cost, 16, 7, 16, 7, 16},
2039
  {&generic32_cost, 16, 7, 16, 7, 16},
1954
  {&generic64_cost, 16, 10, 16, 10, 16},
2040
  {&generic64_cost, 16, 10, 16, 10, 16},
1955
  {&amdfam10_cost, 32, 24, 32, 7, 32}
2041
  {&amdfam10_cost, 32, 24, 32, 7, 32},
2042
  {&atom_cost, 16, 7, 16, 7, 16}
1956
};
2043
};
1957
2044
1958
static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2045
static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
Lines 1970-1975 Link Here
1970
  "prescott",
2057
  "prescott",
1971
  "nocona",
2058
  "nocona",
1972
  "core2",
2059
  "core2",
2060
  "atom",
1973
  "geode",
2061
  "geode",
1974
  "k6",
2062
  "k6",
1975
  "k6-2",
2063
  "k6-2",
Lines 2528-2533 Link Here
2528
      {"core2", PROCESSOR_CORE2, CPU_CORE2,
2616
      {"core2", PROCESSOR_CORE2, CPU_CORE2,
2529
	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2617
	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2530
	| PTA_SSSE3 | PTA_CX16},
2618
	| PTA_SSSE3 | PTA_CX16},
2619
      {"atom", PROCESSOR_ATOM, CPU_ATOM,
2620
	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2621
	| PTA_SSSE3 | PTA_CX16},
2531
      {"geode", PROCESSOR_GEODE, CPU_GEODE,
2622
      {"geode", PROCESSOR_GEODE, CPU_GEODE,
2532
	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2623
	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2533
      {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2624
      {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
Lines 12824-12829 Link Here
12824
    emit_move_insn (operands[0], dst);
12915
    emit_move_insn (operands[0], dst);
12825
}
12916
}
12826
12917
12918
#define LEA_SEARCH_THRESHOLD 12
12919
12920
/* Reach non-agu definition of op1 and op2 in insn's basic block. 
12921
   Search backward until 1. passed LEA_SEARCH_THRESHOLD instructions, 
12922
   or 2. reach BB boundary, or reach agu definition. 
12923
   Returns the distance between the non-agu definition point and insn.
12924
   If no definition point, returns -1 
12925
   TODO: Currently we have no way to distinguish if definition insn is a LEA. 
12926
   We just assume all definitions are non-lea. */
12927
static int
12928
distance_non_agu_define (rtx op1, rtx op2, rtx insn)
12929
{
12930
  rtx reg_op1 = REG_P (op1) ? op1 : NULL;
12931
  rtx reg_op2 = REG_P (op2) ? op2 : NULL;
12932
  basic_block bb = BLOCK_FOR_INSN (insn);
12933
  int distance = 0;
12934
12935
  if (insn != BB_HEAD (bb))
12936
    {
12937
12938
      rtx prev = PREV_INSN (insn);
12939
      while (prev && distance < LEA_SEARCH_THRESHOLD)
12940
	{
12941
	  if (INSN_P (prev))
12942
	    {
12943
	      distance++;
12944
	      if ((reg_op1 && reg_set_p (reg_op1, prev))
12945
		  || (reg_op2 && reg_set_p (reg_op2, prev)))
12946
		return distance ;
12947
	    }
12948
	  if (prev == BB_HEAD (bb))
12949
	    break;
12950
	  prev = PREV_INSN (prev);
12951
	}
12952
    }
12953
  
12954
  if (distance < LEA_SEARCH_THRESHOLD)
12955
    {
12956
      edge e;
12957
      edge_iterator ei;
12958
      bool simple_loop = false;
12959
  
12960
      FOR_EACH_EDGE (e, ei, bb->preds)
12961
	if (e->src == bb)
12962
	  {
12963
	    simple_loop = true;
12964
	    break;
12965
	  }
12966
  
12967
      if (simple_loop)
12968
	{
12969
	  rtx prev = BB_END (bb);
12970
	  while (prev
12971
		 && prev != insn
12972
		 && distance < LEA_SEARCH_THRESHOLD)
12973
	    {
12974
	      if (INSN_P (prev))
12975
		{
12976
		  distance++;
12977
		  if ((reg_op1 && reg_set_p (reg_op1, prev))
12978
		      || (reg_op2 && reg_set_p (reg_op2, prev)))
12979
		    return distance;
12980
		}
12981
	      prev = PREV_INSN (prev);
12982
	    }
12983
	}
12984
    }
12985
12986
  return -1;
12987
}
12988
12989
/* Return the distance between this insn and the next insn that uses 
12990
   result of this insn as memory address. 
12991
   Return -1 if not found such a use within LEA_SEARCH_THRESHOLD. */
12992
static int
12993
distance_agu_use (rtx op0, rtx insn)
12994
{
12995
  basic_block bb = BLOCK_FOR_INSN (insn);
12996
  int distance = 0;
12997
12998
  if (insn != BB_END(bb))
12999
    {
13000
      rtx next = NEXT_INSN (insn);
13001
13002
      while (next && distance < LEA_SEARCH_THRESHOLD)
13003
	{
13004
	  if (INSN_P (next))
13005
	    {
13006
	      distance++;
13007
	      if (reg_mentioned_by_mem_p (op0, next))
13008
		return distance;
13009
	      if (reg_set_p (op0, next))
13010
		return -1;
13011
	    }
13012
	  if (next == BB_END (bb))
13013
	    break;
13014
	  next = NEXT_INSN (next);
13015
	}
13016
    }
13017
13018
  if (distance < LEA_SEARCH_THRESHOLD)
13019
    {
13020
      edge e;
13021
      edge_iterator ei;
13022
      bool simple_loop = false;
13023
  
13024
      FOR_EACH_EDGE (e, ei, bb->succs)
13025
        if (e->dest == bb)
13026
	  {
13027
	    simple_loop = true;
13028
	    break;
13029
	  }
13030
  
13031
      if (simple_loop)
13032
	{
13033
	  rtx next = BB_HEAD (bb);
13034
	  while (next && distance < LEA_SEARCH_THRESHOLD)
13035
	    {
13036
	      if (next == insn)
13037
		break;
13038
	      if (INSN_P (next))
13039
		{
13040
		  distance++;
13041
		  if (reg_mentioned_by_mem_p (op0, next))
13042
		    return distance;
13043
		  if (reg_set_p (op0, next))
13044
		    return -1;
13045
		}
13046
	      next = NEXT_INSN (next);
13047
	    }
13048
	}
13049
    }  
13050
13051
  return -1;
13052
}
13053
13054
/* Define this macro to tune LEA priority vs ADD, it take effect when
13055
   there is a dilemma of choicing LEA or ADD
13056
   Negative value: ADD is more preferred than LEA
13057
   Zero: Netrual
13058
   Positive value: LEA is more preferred than ADD*/
13059
#define IX86_LEA_PRIORITY 2
13060
13061
/* Return true if it is ok to optimize an ADD operation to LEA
13062
   operation to avoid flag register consumation.  For the processors
13063
   like ATOM, if the destination register of LEA holds an actual
13064
   address which will be used soon, LEA is better and otherwise ADD
13065
   is better.  */
13066
13067
bool
13068
ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13069
                     rtx insn,
13070
                     rtx operands[])
13071
{
13072
  gcc_assert (REG_P (operands[0]));
13073
  gcc_assert (operands[1] && operands[2]);
13074
13075
  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13076
    {
13077
      if (true_regnum (operands[0]) != true_regnum (operands[1]))
13078
        return true;
13079
      else
13080
        return false;
13081
    }
13082
13083
  /* If a = b + c, (a!=b && a!=c), must use lea form. */
13084
  if (true_regnum (operands[0]) != true_regnum (operands[1])
13085
      && true_regnum (operands[0]) != true_regnum (operands[2]))
13086
    return true;
13087
  else    
13088
    {
13089
      int dist_define, dist_use;
13090
      dist_define = distance_non_agu_define (operands[1],
13091
					     operands[2], insn);
13092
      if (dist_define <= 0)
13093
        return true;
13094
13095
      /* If this insn has both backward non-agu dependence and forward
13096
         agu dependence, the one with short distance take effect. */
13097
      dist_use = distance_agu_use (operands[0], insn);
13098
      if (dist_use <= 0
13099
	  || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13100
        return false;
13101
13102
      return true;
13103
    }
13104
}
13105
13106
/* Return true if destination reg of SET_INSN is shift count of
13107
   USE_INSN.  */
13108
13109
bool
13110
ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13111
{
13112
  rtx set_pattern = PATTERN (set_insn);
13113
  rtx set_dest;
13114
  rtx shift_rtx;
13115
  rtx use_pattern;
13116
13117
  /* Retrieve destination of set_insn */
13118
  switch (GET_CODE (set_pattern))
13119
    {
13120
    case SET:
13121
      set_dest = SET_DEST (set_pattern);
13122
      break;
13123
    case PARALLEL:
13124
      set_pattern = XVECEXP (set_pattern, 0, 0);
13125
      if (GET_CODE (set_pattern ) == SET)
13126
	{
13127
	  set_dest = SET_DEST (set_pattern);
13128
	  break;
13129
	}
13130
    default:
13131
      set_dest = NULL;
13132
      break;
13133
    }
13134
  if (!set_dest || !REG_P (set_dest))
13135
    return false;
13136
13137
  /* Retrieve shift count of use_insn */
13138
  use_pattern = PATTERN (use_insn);
13139
  switch (GET_CODE (use_pattern))
13140
    {
13141
    case SET:
13142
      shift_rtx = XEXP (use_pattern, 1);
13143
      break;
13144
    case PARALLEL:
13145
      set_pattern = XVECEXP (use_pattern, 0, 0);
13146
      if (GET_CODE (set_pattern) == SET)
13147
	{
13148
	  shift_rtx = XEXP (set_pattern, 1);
13149
	  break;
13150
	}
13151
    default:
13152
      shift_rtx = NULL;
13153
      break;
13154
    }
13155
13156
  if (shift_rtx 
13157
      && (GET_CODE (shift_rtx) == ASHIFT
13158
	  || GET_CODE (shift_rtx) == LSHIFTRT
13159
	  || GET_CODE (shift_rtx) == ASHIFTRT
13160
	  || GET_CODE (shift_rtx) == ROTATE
13161
	  || GET_CODE (shift_rtx) == ROTATERT))
13162
    {
13163
      rtx shift_count = XEXP (shift_rtx, 1);
13164
      gcc_assert (shift_count);
13165
13166
      /* Return true if shift count is dest of set_insn */
13167
      if (REG_P (shift_count)
13168
	  && true_regnum (set_dest) == true_regnum (shift_count))
13169
	return true;
13170
    }
13171
13172
  return false;
13173
}
13174
12827
/* Return TRUE or FALSE depending on whether the unary operator meets the
13175
/* Return TRUE or FALSE depending on whether the unary operator meets the
12828
   appropriate constraints.  */
13176
   appropriate constraints.  */
12829
13177
Lines 18943-18948 Link Here
18943
  switch (ix86_tune)
19291
  switch (ix86_tune)
18944
    {
19292
    {
18945
    case PROCESSOR_PENTIUM:
19293
    case PROCESSOR_PENTIUM:
19294
    case PROCESSOR_ATOM:
18946
    case PROCESSOR_K6:
19295
    case PROCESSOR_K6:
18947
      return 2;
19296
      return 2;
18948
19297
Lines 19009-19049 Link Here
19009
  return 1;
19358
  return 1;
19010
}
19359
}
19011
19360
19012
/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
19361
/* Return true iff USE_INSN has a memory address with operands set by
19013
   address with operands set by DEP_INSN.  */
19362
   SET_INSN.  */
19014
19363
19015
static int
19364
bool
19016
ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19365
ix86_agi_dependent (rtx set_insn, rtx use_insn)
19017
{
19366
{
19018
  rtx addr;
19367
  int i;
19019
19368
  extract_insn_cached (use_insn);
19020
  if (insn_type == TYPE_LEA
19369
  for (i = recog_data.n_operands - 1; i >= 0; --i)
19021
      && TARGET_PENTIUM)
19370
    if (MEM_P (recog_data.operand[i]))
19022
    {
19371
      {
19023
      addr = PATTERN (insn);
19372
	rtx addr = XEXP (recog_data.operand[i], 0);
19024
19373
	return modified_in_p (addr, set_insn) != 0;
19025
      if (GET_CODE (addr) == PARALLEL)
19374
      }
19026
	addr = XVECEXP (addr, 0, 0);
19375
  return false;
19027
19028
      gcc_assert (GET_CODE (addr) == SET);
19029
19030
      addr = SET_SRC (addr);
19031
    }
19032
  else
19033
    {
19034
      int i;
19035
      extract_insn_cached (insn);
19036
      for (i = recog_data.n_operands - 1; i >= 0; --i)
19037
	if (MEM_P (recog_data.operand[i]))
19038
	  {
19039
	    addr = XEXP (recog_data.operand[i], 0);
19040
	    goto found;
19041
	  }
19042
      return 0;
19043
    found:;
19044
    }
19045
19046
  return modified_in_p (addr, dep_insn);
19047
}
19376
}
19048
19377
19049
static int
19378
static int
Lines 19071-19079 Link Here
19071
    {
19400
    {
19072
    case PROCESSOR_PENTIUM:
19401
    case PROCESSOR_PENTIUM:
19073
      /* Address Generation Interlock adds a cycle of latency.  */
19402
      /* Address Generation Interlock adds a cycle of latency.  */
19074
      if (ix86_agi_dependent (insn, dep_insn, insn_type))
19403
      if (insn_type == TYPE_LEA)
19075
	cost += 1;
19404
	{
19405
	  rtx addr = PATTERN (insn);
19076
19406
19407
	  if (GET_CODE (addr) == PARALLEL)
19408
	    addr = XVECEXP (addr, 0, 0);
19409
19410
	  gcc_assert (GET_CODE (addr) == SET);
19411
19412
	  addr = SET_SRC (addr);
19413
	  if (modified_in_p (addr, dep_insn))
19414
	    cost += 1;
19415
	}
19416
19077
      /* ??? Compares pair with jump/setcc.  */
19417
      /* ??? Compares pair with jump/setcc.  */
19078
      if (ix86_flags_dependent (insn, dep_insn, insn_type))
19418
      if (ix86_flags_dependent (insn, dep_insn, insn_type))
19079
	cost = 0;
19419
	cost = 0;
Lines 19081-19087 Link Here
19081
      /* Floating point stores require value to be ready one cycle earlier.  */
19421
      /* Floating point stores require value to be ready one cycle earlier.  */
19082
      if (insn_type == TYPE_FMOV
19422
      if (insn_type == TYPE_FMOV
19083
	  && get_attr_memory (insn) == MEMORY_STORE
19423
	  && get_attr_memory (insn) == MEMORY_STORE
19084
	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
19424
	  && !ix86_agi_dependent (dep_insn, insn))
19085
	cost += 1;
19425
	cost += 1;
19086
      break;
19426
      break;
19087
19427
Lines 19104-19110 Link Here
19104
	 in parallel with previous instruction in case
19444
	 in parallel with previous instruction in case
19105
	 previous instruction is not needed to compute the address.  */
19445
	 previous instruction is not needed to compute the address.  */
19106
      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19446
      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19107
	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
19447
	  && !ix86_agi_dependent (dep_insn, insn))
19108
	{
19448
	{
19109
	  /* Claim moves to take one cycle, as core can issue one load
19449
	  /* Claim moves to take one cycle, as core can issue one load
19110
	     at time and the next load can start cycle later.  */
19450
	     at time and the next load can start cycle later.  */
Lines 19133-19139 Link Here
19133
	 in parallel with previous instruction in case
19473
	 in parallel with previous instruction in case
19134
	 previous instruction is not needed to compute the address.  */
19474
	 previous instruction is not needed to compute the address.  */
19135
      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19475
      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19136
	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
19476
	  && !ix86_agi_dependent (dep_insn, insn))
19137
	{
19477
	{
19138
	  /* Claim moves to take one cycle, as core can issue one load
19478
	  /* Claim moves to take one cycle, as core can issue one load
19139
	     at time and the next load can start cycle later.  */
19479
	     at time and the next load can start cycle later.  */
Lines 19150-19155 Link Here
19150
    case PROCESSOR_ATHLON:
19490
    case PROCESSOR_ATHLON:
19151
    case PROCESSOR_K8:
19491
    case PROCESSOR_K8:
19152
    case PROCESSOR_AMDFAM10:
19492
    case PROCESSOR_AMDFAM10:
19493
    case PROCESSOR_ATOM:
19153
    case PROCESSOR_GENERIC32:
19494
    case PROCESSOR_GENERIC32:
19154
    case PROCESSOR_GENERIC64:
19495
    case PROCESSOR_GENERIC64:
19155
      memory = get_attr_memory (insn);
19496
      memory = get_attr_memory (insn);
Lines 19158-19164 Link Here
19158
	 in parallel with previous instruction in case
19499
	 in parallel with previous instruction in case
19159
	 previous instruction is not needed to compute the address.  */
19500
	 previous instruction is not needed to compute the address.  */
19160
      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19501
      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19161
	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
19502
	  && !ix86_agi_dependent (dep_insn, insn))
19162
	{
19503
	{
19163
	  enum attr_unit unit = get_attr_unit (insn);
19504
	  enum attr_unit unit = get_attr_unit (insn);
19164
	  int loadcost = 3;
19505
	  int loadcost = 3;

Return to bug 262603