diff --git a/gcc/config.gcc b/gcc/config.gcc
index f243cb3..74e79fc 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1074,7 +1074,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i
tmake_file="${tmake_file} i386/t-linux64"
need_64bit_hwint=yes
case X"${with_cpu}" in
- Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
+ Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
;;
X)
if test x$with_cpu_64 = x; then
@@ -1083,7 +1083,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i
;;
*)
echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
- echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
+ echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
exit 1
;;
esac
@@ -1189,7 +1189,7 @@ i[34567]86-*-solaris2*)
need_64bit_hwint=yes
use_gcc_stdint=wrap
case X"${with_cpu}" in
- Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
+ Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
;;
X)
if test x$with_cpu_64 = x; then
@@ -1198,7 +1198,7 @@ i[34567]86-*-solaris2*)
;;
*)
echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
- echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
+ echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
exit 1
;;
esac
@@ -2801,7 +2801,7 @@ case "${target}" in
esac
# OK
;;
- "" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
+ "" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | atom | generic)
# OK
;;
*)
diff --git a/gcc/config/i386/atom.md b/gcc/config/i386/atom.md
new file mode 100644
index 0000000..9d5cbf2
--- /dev/null
+++ b/gcc/config/i386/atom.md
@@ -0,0 +1,770 @@
+;; Atom Scheduling
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; .
+;;
+;; Atom is an in-order core with two integer pipelines.
+
+
+(define_attr "atom_unit" "sishuf,simul,jeu,complex,other"
+ (const_string "other"))
+
+(define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other"
+ (const_string "other"))
+
+(define_automaton "atom")
+
+;; Atom has two ports: port 0 and port 1 connecting to all execution units
+(define_cpu_unit "atom-port-0,atom-port-1" "atom")
+
+;; EU: Execution Unit
+;; Atom EUs are connected by port 0 or port 1.
+
+(define_cpu_unit "atom-eu-0, atom-eu-1,
+ atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4"
+ "atom")
+
+;; Some EUs have duplicated copied and can be accessed via either
+;; port 0 or port 1
+;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)")
+
+;;; Some instructions is dual-pipe execution, need both ports
+;;; Complex multi-op macro-instructoins need both ports and all EUs
+(define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)")
+(define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 +
+ atom-imul-1 + atom-imul-2 + atom-imul-3 +
+ atom-imul-4)")
+
+;;; Most of simple instructions have 1 cycle latency. Some of them
+;;; issue in port 0, some in port 0 and some in either port.
+(define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)")
+(define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)")
+(define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)")
+
+;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput
+(define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)")
+
+;;; fmul insn can have 4 or 5 cycles latency
+(define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4")
+(define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3")
+
+;;; fadd can has 5 cycles latency depends on instruction forms
+(define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5")
+
+;;; imul insn has 5 cycles latency
+(define_reservation "atom-imul-32"
+ "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4,
+ atom-port-0")
+;;; imul instruction excludes other non-FP instructions.
+(exclusion_set "atom-eu-0, atom-eu-1"
+ "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4")
+
+;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on
+;;; instruction forms
+(define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)")
+(define_reservation "atom-dual-2c"
+ "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)")
+(define_reservation "atom-dual-5c"
+ "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)")
+
+;;; Complex macro-instruction has variants of latency, and uses both ports.
+(define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)")
+
+(define_insn_reservation "atom_other" 9
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "other")
+ (eq_attr "atom_unit" "!jeu")))
+ "atom-complex, atom-all-eu*8")
+
+;; return has type "other" with atom_unit "jeu"
+(define_insn_reservation "atom_other_2" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "other")
+ (eq_attr "atom_unit" "jeu")))
+ "atom-dual-1c")
+
+(define_insn_reservation "atom_multi" 9
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "multi"))
+ "atom-complex, atom-all-eu*8")
+
+;; Normal alu insns without carry
+(define_insn_reservation "atom_alu" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "none")
+ (eq_attr "use_carry" "0"))))
+ "atom-simple-either")
+
+;; Normal alu insns without carry
+(define_insn_reservation "atom_alu_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "use_carry" "0"))))
+ "atom-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation "atom_alu_carry" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "none")
+ (eq_attr "use_carry" "1"))))
+ "atom-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation "atom_alu_carry_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "use_carry" "1"))))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_alu1" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "alu1")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_alu1_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "alu1")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_negnot" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "negnot")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_negnot_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "negnot")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_imov" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_imov_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+;; 16<-16, 32<-32
+(define_insn_reservation "atom_imovx" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "none")
+ (ior (and (match_operand:HI 0 "register_operand")
+ (match_operand:HI 1 "general_operand"))
+ (and (match_operand:SI 0 "register_operand")
+ (match_operand:SI 1 "general_operand"))))))
+ "atom-simple-either")
+
+;; 16<-16, 32<-32, mem
+(define_insn_reservation "atom_imovx_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "!none")
+ (ior (and (match_operand:HI 0 "register_operand")
+ (match_operand:HI 1 "general_operand"))
+ (and (match_operand:SI 0 "register_operand")
+ (match_operand:SI 1 "general_operand"))))))
+ "atom-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
+(define_insn_reservation "atom_imovx_2" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "none")
+ (ior (match_operand:QI 0 "register_operand")
+ (ior (and (match_operand:SI 0 "register_operand")
+ (not (match_operand:SI 1 "general_operand")))
+ (match_operand:DI 0 "register_operand"))))))
+ "atom-simple-0")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
+(define_insn_reservation "atom_imovx_2_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "!none")
+ (ior (match_operand:QI 0 "register_operand")
+ (ior (and (match_operand:SI 0 "register_operand")
+ (not (match_operand:SI 1 "general_operand")))
+ (match_operand:DI 0 "register_operand"))))))
+ "atom-simple-0")
+
+;; 16<-8
+(define_insn_reservation "atom_imovx_3" 3
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imovx")
+ (and (match_operand:HI 0 "register_operand")
+ (match_operand:QI 1 "general_operand"))))
+ "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation "atom_lea" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "lea")
+ (eq_attr "mode" "!HI")))
+ "atom-simple-either")
+
+;; lea 16bit address is complex insn
+(define_insn_reservation "atom_lea_2" 2
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "lea")
+ (eq_attr "mode" "HI")))
+ "atom-complex, atom-all-eu")
+
+(define_insn_reservation "atom_incdec" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "incdec")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_incdec_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "incdec")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+;; simple shift instruction use SHIFT eu, none memory
+(define_insn_reservation "atom_ishift" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ishift")
+ (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
+ "atom-simple-0")
+
+;; simple shift instruction use SHIFT eu, memory
+(define_insn_reservation "atom_ishift_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ishift")
+ (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
+ "atom-simple-0")
+
+;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles
+(define_insn_reservation "atom_ishift_3" 7
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ishift")
+ (eq_attr "prefix_0f" "1")))
+ "atom-complex, atom-all-eu*6")
+
+(define_insn_reservation "atom_ishift1" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ishift1")
+ (eq_attr "memory" "none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_ishift1_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ishift1")
+ (eq_attr "memory" "!none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_rotate" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "rotate")
+ (eq_attr "memory" "none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_rotate_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "rotate")
+ (eq_attr "memory" "!none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_rotate1" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "rotate1")
+ (eq_attr "memory" "none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_rotate1_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "rotate1")
+ (eq_attr "memory" "!none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_imul" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
+ "atom-imul-32")
+
+(define_insn_reservation "atom_imul_mem" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
+ "atom-imul-32")
+
+;; latency set to 10 as common 64x64 imul
+(define_insn_reservation "atom_imul_3" 10
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imul")
+ (eq_attr "mode" "!SI")))
+ "atom-complex, atom-all-eu*9")
+
+(define_insn_reservation "atom_idiv" 65
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "idiv"))
+ "atom-complex, atom-all-eu*32, nothing*32")
+
+(define_insn_reservation "atom_icmp" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "icmp")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_icmp_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "icmp")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_test" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "test")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_test_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "test")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_ibr" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ibr")
+ (eq_attr "memory" "!load")))
+ "atom-simple-1")
+
+;; complex if jump target is from address
+(define_insn_reservation "atom_ibr_2" 2
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ibr")
+ (eq_attr "memory" "load")))
+ "atom-complex, atom-all-eu")
+
+(define_insn_reservation "atom_setcc" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "setcc")
+ (eq_attr "memory" "!store")))
+ "atom-simple-either")
+
+;; 2 cycles complex if target is in memory
+(define_insn_reservation "atom_setcc_2" 2
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "setcc")
+ (eq_attr "memory" "store")))
+ "atom-complex, atom-all-eu")
+
+(define_insn_reservation "atom_icmov" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "icmov")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_icmov_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "icmov")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation "atom_push" 2
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "push"))
+ "atom-dual-2c")
+
+;; pop r64 is 1 cycle. UCODE if segreg, ignored
+(define_insn_reservation "atom_pop" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "pop")
+ (eq_attr "mode" "DI")))
+ "atom-dual-1c")
+
+;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
+(define_insn_reservation "atom_pop_2" 2
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "pop")
+ (eq_attr "mode" "!DI")))
+ "atom-dual-2c")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation "atom_call" 1
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "call"))
+ "atom-dual-1c")
+
+(define_insn_reservation "atom_callv" 1
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "callv"))
+ "atom-dual-1c")
+
+(define_insn_reservation "atom_leave" 3
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "leave"))
+ "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation "atom_str" 3
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "str"))
+ "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation "atom_sselog" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sselog")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_sselog_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sselog")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_sselog1" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sselog1")
+ (eq_attr "memory" "none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_sselog1_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sselog1")
+ (eq_attr "memory" "!none")))
+ "atom-simple-0")
+
+;; not pmad, not psad
+(define_insn_reservation "atom_sseiadd" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseiadd")
+ (and (not (match_operand:V2DI 0 "register_operand"))
+ (and (eq_attr "atom_unit" "!simul")
+ (eq_attr "atom_unit" "!complex")))))
+ "atom-simple-either")
+
+;; pmad, psad and 64
+(define_insn_reservation "atom_sseiadd_2" 4
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseiadd")
+ (and (not (match_operand:V2DI 0 "register_operand"))
+ (and (eq_attr "atom_unit" "simul" )
+ (eq_attr "mode" "DI")))))
+ "atom-fmul-4c")
+
+;; pmad, psad and 128
+(define_insn_reservation "atom_sseiadd_3" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseiadd")
+ (and (not (match_operand:V2DI 0 "register_operand"))
+ (and (eq_attr "atom_unit" "simul" )
+ (eq_attr "mode" "TI")))))
+ "atom-fmul-5c")
+
+;; if paddq(64 bit op), phadd/phsub
+(define_insn_reservation "atom_sseiadd_4" 6
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseiadd")
+ (ior (match_operand:V2DI 0 "register_operand")
+ (eq_attr "atom_unit" "complex"))))
+ "atom-complex, atom-all-eu*5")
+
+;; if immediate op.
+(define_insn_reservation "atom_sseishft" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseishft")
+ (and (eq_attr "atom_unit" "!sishuf")
+ (match_operand 2 "immediate_operand"))))
+ "atom-simple-either")
+
+;; if palignr or psrldq
+(define_insn_reservation "atom_sseishft_2" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseishft")
+ (and (eq_attr "atom_unit" "sishuf")
+ (match_operand 2 "immediate_operand"))))
+ "atom-simple-0")
+
+;; if reg/mem op
+(define_insn_reservation "atom_sseishft_3" 2
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseishft")
+ (not (match_operand 2 "immediate_operand"))))
+ "atom-complex, atom-all-eu")
+
+(define_insn_reservation "atom_sseimul" 1
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "sseimul"))
+ "atom-simple-0")
+
+;; rcpss or rsqrtss
+(define_insn_reservation "atom_sse" 4
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
+ "atom-fmul-4c")
+
+;; movshdup, movsldup. Suggest to type sseishft
+(define_insn_reservation "atom_sse_2" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sse")
+ (eq_attr "atom_sse_attr" "movdup")))
+ "atom-simple-0")
+
+;; lfence
+(define_insn_reservation "atom_sse_3" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sse")
+ (eq_attr "atom_sse_attr" "lfence")))
+ "atom-simple-either")
+
+;; sfence,clflush,mfence, prefetch
+(define_insn_reservation "atom_sse_4" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sse")
+ (ior (eq_attr "atom_sse_attr" "fence")
+ (eq_attr "atom_sse_attr" "prefetch"))))
+ "atom-simple-0")
+
+;; rcpps, rsqrtss, sqrt, ldmxcsr
+(define_insn_reservation "atom_sse_5" 7
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sse")
+ (ior (ior (eq_attr "atom_sse_attr" "sqrt")
+ (eq_attr "atom_sse_attr" "mxcsr"))
+ (and (eq_attr "atom_sse_attr" "rcp")
+ (eq_attr "mode" "V4SF")))))
+ "atom-complex, atom-all-eu*6")
+
+;; xmm->xmm
+(define_insn_reservation "atom_ssemov" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemov")
+ (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy"))))
+ "atom-simple-either")
+
+;; reg->xmm
+(define_insn_reservation "atom_ssemov_2" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemov")
+ (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r"))))
+ "atom-simple-0")
+
+;; xmm->reg
+(define_insn_reservation "atom_ssemov_3" 3
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemov")
+ (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy"))))
+ "atom-eu-0-3-1")
+
+;; mov mem
+(define_insn_reservation "atom_ssemov_4" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
+ "atom-simple-0")
+
+;; movu mem
+(define_insn_reservation "atom_ssemov_5" 2
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemov")
+ (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
+ "atom-complex, atom-all-eu")
+
+;; no memory simple
+(define_insn_reservation "atom_sseadd" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "!V2DF")
+ (eq_attr "atom_unit" "!complex")))))
+ "atom-fadd-5c")
+
+;; memory simple
+(define_insn_reservation "atom_sseadd_mem" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "memory" "!none")
+ (and (eq_attr "mode" "!V2DF")
+ (eq_attr "atom_unit" "!complex")))))
+ "atom-dual-5c")
+
+;; maxps, minps, *pd, hadd, hsub
+(define_insn_reservation "atom_sseadd_3" 8
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseadd")
+ (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
+ "atom-complex, atom-all-eu*7")
+
+;; Except dppd/dpps
+(define_insn_reservation "atom_ssemul" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "mode" "!SF")))
+ "atom-fmul-5c")
+
+;; Except dppd/dpps, 4 cycle if mulss
+(define_insn_reservation "atom_ssemul_2" 4
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "mode" "SF")))
+ "atom-fmul-4c")
+
+(define_insn_reservation "atom_ssecmp" 1
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "ssecmp"))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_ssecomi" 10
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "ssecomi"))
+ "atom-complex, atom-all-eu*9")
+
+;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation "atom_ssecvt" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssecvt")
+ (ior (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V4SF 1 "register_operand"))
+ (and (match_operand:V4SF 0 "register_operand")
+ (match_operand:V2SI 1 "register_operand")))))
+ "atom-fadd-5c")
+
+;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation "atom_ssecvt_2" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssecvt")
+ (ior (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V4SF 1 "memory_operand"))
+ (and (match_operand:V4SF 0 "register_operand")
+ (match_operand:V2SI 1 "memory_operand")))))
+ "atom-dual-5c")
+
+;; otherwise. 7 cycles average for cvtss2sd
+(define_insn_reservation "atom_ssecvt_3" 7
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssecvt")
+ (not (ior (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V4SF 1 "nonimmediate_operand"))
+ (and (match_operand:V4SF 0 "register_operand")
+ (match_operand:V2SI 1 "nonimmediate_operand"))))))
+ "atom-complex, atom-all-eu*6")
+
+;; memory and cvtsi2sd
+(define_insn_reservation "atom_sseicvt" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseicvt")
+ (and (match_operand:V2DF 0 "register_operand")
+ (match_operand:SI 1 "memory_operand"))))
+ "atom-dual-5c")
+
+;; otherwise. 8 cycles average for cvtsd2si
+(define_insn_reservation "atom_sseicvt_2" 8
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseicvt")
+ (not (and (match_operand:V2DF 0 "register_operand")
+ (match_operand:SI 1 "memory_operand")))))
+ "atom-complex, atom-all-eu*7")
+
+(define_insn_reservation "atom_ssediv" 62
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "ssediv"))
+ "atom-complex, atom-all-eu*12, nothing*49")
+
+;; simple for fmov
+(define_insn_reservation "atom_fmov" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+;; simple for fmov
+(define_insn_reservation "atom_fmov_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+;; Define bypass here
+
+;; There will be no stall from lea to non-mem EX insns
+(define_bypass 0 "atom_lea"
+ "atom_alu_carry,
+ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+ atom_incdec, atom_setcc, atom_icmov, atom_pop")
+
+(define_bypass 0 "atom_lea"
+ "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+ atom_imovx_mem, atom_imovx_2_mem,
+ atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+ "!ix86_agi_dependent")
+
+;; There will be 3 cycles stall from EX insns to AGAN insns LEA
+(define_bypass 4 "atom_alu_carry,
+ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+ atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+ atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+ atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+ atom_imovx_mem, atom_imovx_2_mem,
+ atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+ "atom_lea")
+
+;; There will be 3 cycles stall from EX insns to insns need addr calculation
+(define_bypass 4 "atom_alu_carry,
+ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+ atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+ atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+ atom_imovx_mem, atom_imovx_2_mem,
+ atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+ atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+ "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+ atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
+ atom_imovx_mem, atom_imovx_2_mem,
+ atom_imul_mem, atom_icmp_mem,
+ atom_test_mem, atom_icmov_mem, atom_sselog_mem,
+ atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem,
+ atom_ishift_mem, atom_ishift1_mem,
+ atom_rotate_mem, atom_rotate1_mem"
+ "ix86_agi_dependent")
+
+;; Stall from imul to lea is 8 cycles.
+(define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea")
+
+;; Stall from imul to memory address is 8 cycles.
+(define_bypass 9 "atom_imul, atom_imul_mem"
+ "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+ atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
+ atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem,
+ atom_rotate1_mem, atom_imul_mem, atom_icmp_mem,
+ atom_test_mem, atom_icmov_mem, atom_sselog_mem,
+ atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem"
+ "ix86_agi_dependent")
+
+;; There will be 0 cycle stall from cmp/test to jcc
+
+;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
+(define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry,
+ atom_alu1, atom_negnot, atom_incdec, atom_ishift,
+ atom_ishift1, atom_rotate, atom_rotate1"
+ "atom_icmov, atom_alu_carry")
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 3d17c10..0c59b2f 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -119,6 +119,10 @@ ix86_target_macros_internal (int isa_flag,
def_or_undef (parse_in, "__core2");
def_or_undef (parse_in, "__core2__");
break;
+ case PROCESSOR_ATOM:
+ def_or_undef (parse_in, "__atom");
+ def_or_undef (parse_in, "__atom__");
+ break;
/* use PROCESSOR_max to not set/unset the arch macro. */
case PROCESSOR_max:
break;
@@ -187,6 +191,9 @@ ix86_target_macros_internal (int isa_flag,
case PROCESSOR_CORE2:
def_or_undef (parse_in, "__tune_core2__");
break;
+ case PROCESSOR_ATOM:
+ def_or_undef (parse_in, "__tune_atom__");
+ break;
case PROCESSOR_GENERIC32:
case PROCESSOR_GENERIC64:
break;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 154c324..ea9d35e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1036,6 +1036,79 @@ struct processor_costs core2_cost = {
1, /* cond_not_taken_branch_cost. */
};
+static const
+struct processor_costs atom_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {8, 8, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {8, 8, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 256, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+ {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
+ {libcall, {{32, loop}, {64, rep_prefix_4_byte},
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ {{libcall, {{8, loop}, {15, unrolled_loop},
+ {2048, rep_prefix_4_byte}, {-1, libcall}}},
+ {libcall, {{24, loop}, {32, unrolled_loop},
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
/* Generic64 should produce code tuned for Nocona and K8. */
static const
struct processor_costs generic64_cost = {
@@ -1194,6 +1267,7 @@ const struct processor_costs *ix86_cost = &pentium_cost;
#define m_PENT4 (1<mode) && TARGET_SSE_MATH"
"%vsqrts\t{%1, %d0|%d0, %1}"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "sqrt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "")
(set_attr "athlon_decode" "*")
@@ -19850,6 +19884,7 @@
; Since we don't have the proper number of operands for an alu insn,
; fill in all the blanks.
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "memory" "none")
(set_attr "imm_disp" "false")
@@ -19865,6 +19900,7 @@
""
"sbb{q}\t%0, %0"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "memory" "none")
(set_attr "imm_disp" "false")
@@ -19908,6 +19944,7 @@
; Since we don't have the proper number of operands for an alu insn,
; fill in all the blanks.
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "memory" "none")
(set_attr "imm_disp" "false")
@@ -19923,6 +19960,7 @@
""
"sbb{l}\t%0, %0"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "memory" "none")
(set_attr "imm_disp" "false")
@@ -20255,7 +20293,8 @@
}
}
[(set (attr "type")
- (cond [(eq_attr "alternative" "0")
+ (cond [(and (eq_attr "alternative" "0")
+ (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
(const_string "alu")
(match_operand:SI 2 "const0_operand" "")
(const_string "imov")
@@ -20298,7 +20337,8 @@
}
}
[(set (attr "type")
- (cond [(eq_attr "alternative" "0")
+ (cond [(and (eq_attr "alternative" "0")
+ (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
(const_string "alu")
(match_operand:DI 2 "const0_operand" "")
(const_string "imov")
@@ -21790,6 +21830,7 @@
return patterns[locality];
}
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "prefetch")
(set_attr "memory" "none")])
(define_insn "*prefetch_sse_rex"
@@ -21808,6 +21849,7 @@
return patterns[locality];
}
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "prefetch")
(set_attr "memory" "none")])
(define_insn "*prefetch_3dnow"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e6d1fd1..977a23c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -338,6 +338,7 @@
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"vmovup\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
@@ -363,6 +364,7 @@
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"movup\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
(set_attr "mode" "")])
(define_insn "avx_movdqu"
@@ -373,6 +375,7 @@
"TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"vmovdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
@@ -383,6 +386,7 @@
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"movdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
@@ -424,7 +428,7 @@
UNSPEC_MOVNT))]
"TARGET_SSE2"
"movntdq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
@@ -434,7 +438,7 @@
UNSPEC_MOVNT))]
"TARGET_SSE2"
"movnti\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "mode" "V2DF")])
(define_insn "avx_lddqu"
@@ -445,6 +449,7 @@
"TARGET_AVX"
"vlddqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "movu" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
@@ -454,7 +459,8 @@
UNSPEC_LDDQU))]
"TARGET_SSE3"
"lddqu\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
(set_attr "prefix_rep" "1")
(set_attr "mode" "TI")])
@@ -761,6 +767,7 @@
"TARGET_SSE"
"%vrcpps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "rcp")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
@@ -787,6 +794,7 @@
"TARGET_SSE"
"rcpss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "rcp")
(set_attr "mode" "SF")])
(define_expand "sqrtv8sf2"
@@ -832,6 +840,7 @@
"TARGET_SSE"
"%vsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "sqrt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
@@ -876,6 +885,7 @@
"SSE_VEC_FLOAT_MODE_P (mode)"
"sqrts\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "sqrt")
(set_attr "mode" "")])
(define_expand "rsqrtv8sf2"
@@ -1039,7 +1049,7 @@
(const_int 1)))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"s\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
+ [(set_attr "type" "sseadd")
(set_attr "mode" "")])
;; These versions of the min/max patterns implement exactly the operations
@@ -1175,6 +1185,7 @@
"TARGET_SSE3"
"addsubpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
+ (set_attr "atom_unit" "complex")
(set_attr "mode" "V2DF")])
(define_insn "avx_hv4df3"
@@ -1298,6 +1309,7 @@
"TARGET_SSE3"
"hps\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
@@ -5066,6 +5078,7 @@
"TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
"pmaddwd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "simul")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
@@ -7025,6 +7038,7 @@
movq\t{%H1, %0|%0, %H1}
mov{q}\t{%H1, %0|%0, %H1}"
[(set_attr "type" "ssemov,sseishft,ssemov,imov")
+ (set_attr "atom_unit" "*,sishuf,*,*")
(set_attr "memory" "*,none,*,*")
(set_attr "mode" "V2SF,TI,TI,DI")])
@@ -7057,6 +7071,7 @@
psrldq\t{$8, %0|%0, 8}
movq\t{%H1, %0|%0, %H1}"
[(set_attr "type" "ssemov,sseishft,ssemov")
+ (set_attr "atom_unit" "*,sishuf,*")
(set_attr "memory" "*,none,*")
(set_attr "mode" "V2SF,TI,TI")])
@@ -7614,6 +7629,7 @@
"TARGET_SSE2"
"psadbw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "simul")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
@@ -7635,7 +7651,7 @@
UNSPEC_MOVMSK))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"%vmovmskp\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "")])
@@ -7645,7 +7661,7 @@
UNSPEC_MOVMSK))]
"TARGET_SSE2"
"%vpmovmskb\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "prefix_data16" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
@@ -7668,7 +7684,7 @@
"TARGET_SSE2 && !TARGET_64BIT"
;; @@@ check ordering of operands in intel/nonintel syntax
"%vmaskmovdqu\t{%2, %1|%1, %2}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "prefix_data16" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
@@ -7682,7 +7698,7 @@
"TARGET_SSE2 && TARGET_64BIT"
;; @@@ check ordering of operands in intel/nonintel syntax
"%vmaskmovdqu\t{%2, %1|%1, %2}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "prefix_data16" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
@@ -7693,6 +7709,7 @@
"TARGET_SSE"
"%vldmxcsr\t%0"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "mxcsr")
(set_attr "prefix" "maybe_vex")
(set_attr "memory" "load")])
@@ -7702,6 +7719,7 @@
"TARGET_SSE"
"%vstmxcsr\t%0"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "mxcsr")
(set_attr "prefix" "maybe_vex")
(set_attr "memory" "store")])
@@ -7720,6 +7738,7 @@
"TARGET_SSE || TARGET_3DNOW_A"
"sfence"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "fence")
(set_attr "memory" "unknown")])
(define_insn "sse2_clflush"
@@ -7728,6 +7747,7 @@
"TARGET_SSE2"
"clflush\t%a0"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "fence")
(set_attr "memory" "unknown")])
(define_expand "sse2_mfence"
@@ -7745,6 +7765,7 @@
"TARGET_64BIT || TARGET_SSE2"
"mfence"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "fence")
(set_attr "memory" "unknown")])
(define_expand "sse2_lfence"
@@ -7762,6 +7783,7 @@
"TARGET_SSE2"
"lfence"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "lfence")
(set_attr "memory" "unknown")])
(define_insn "sse3_mwait"
@@ -7885,6 +7907,7 @@
"TARGET_SSSE3"
"phaddw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -7913,6 +7936,7 @@
"TARGET_SSSE3"
"phaddw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -7967,6 +7991,7 @@
"TARGET_SSSE3"
"phaddd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -7987,6 +8012,7 @@
"TARGET_SSSE3"
"phaddd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8073,6 +8099,7 @@
"TARGET_SSSE3"
"phaddsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -8101,6 +8128,7 @@
"TARGET_SSSE3"
"phaddsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8187,6 +8215,7 @@
"TARGET_SSSE3"
"phsubw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -8215,6 +8244,7 @@
"TARGET_SSSE3"
"phsubw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8269,6 +8299,7 @@
"TARGET_SSSE3"
"phsubd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -8289,6 +8320,7 @@
"TARGET_SSSE3"
"phsubd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8375,6 +8407,7 @@
"TARGET_SSSE3"
"phsubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -8403,6 +8436,7 @@
"TARGET_SSSE3"
"phsubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8509,6 +8543,7 @@
"TARGET_SSSE3"
"pmaddubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "simul")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -8547,6 +8582,7 @@
"TARGET_SSSE3"
"pmaddubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "simul")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8754,6 +8790,7 @@
return "palignr\t{%3, %2, %0|%0, %2, %3}";
}
[(set_attr "type" "sseishft")
+ (set_attr "atom_unit" "sishuf")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -8770,6 +8807,7 @@
return "palignr\t{%3, %2, %0|%0, %2, %3}";
}
[(set_attr "type" "sseishft")
+ (set_attr "atom_unit" "sishuf")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8956,7 +8994,7 @@
UNSPEC_MOVNTDQA))]
"TARGET_SSE4_1"
"%vmovntdqa\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])