summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--5010_enable-additional-cpu-optimizations-for-gcc.patch225
1 files changed, 166 insertions, 59 deletions
diff --git a/5010_enable-additional-cpu-optimizations-for-gcc.patch b/5010_enable-additional-cpu-optimizations-for-gcc.patch
index d9729b23..76cbd9d2 100644
--- a/5010_enable-additional-cpu-optimizations-for-gcc.patch
+++ b/5010_enable-additional-cpu-optimizations-for-gcc.patch
@@ -1,33 +1,51 @@
-WARNING - this version of the patch works with version 4.9+ of gcc and with
-kernel version 3.15.x+ and should NOT be applied when compiling on older
-versions due to name changes of the flags with the 4.9 release of gcc.
+WARNING
+This patch works with gcc versions 4.9+ and with kernel version 3.15+ and should
+NOT be applied when compiling on older versions of gcc due to key name changes
+of the march flags introduced with the version 4.9 release of gcc.[1]
+
Use the older version of this patch hosted on the same github for older
-versions of gcc. For example:
+versions of gcc.
-corei7 --> nehalem
-corei7-avx --> sandybridge
-core-avx-i --> ivybridge
-core-avx2 --> haswell
+FEATURES
+This patch adds additional CPU options to the Linux kernel accessible under:
+ Processor type and features --->
+ Processor family --->
-For more, see: https://gcc.gnu.org/gcc-4.9/changes.html
+The expanded microarchitectures include:
+* AMD Improved K8-family
+* AMD K10-family
+* AMD Family 10h (Barcelona)
+* AMD Family 14h (Bobcat)
+* AMD Family 16h (Jaguar)
+* AMD Family 15h (Bulldozer)
+* AMD Family 15h (Piledriver)
+* AMD Family 15h (Steamroller)
+* AMD Family 15h (Excavator)
+* AMD Family 17h (Zen)
+* Intel Silvermont low-power processors
+* Intel 1st Gen Core i3/i5/i7 (Nehalem)
+* Intel 1.5 Gen Core i3/i5/i7 (Westmere)
+* Intel 2nd Gen Core i3/i5/i7 (Sandybridge)
+* Intel 3rd Gen Core i3/i5/i7 (Ivybridge)
+* Intel 4th Gen Core i3/i5/i7 (Haswell)
+* Intel 5th Gen Core i3/i5/i7 (Broadwell)
+* Intel 6th Gen Core i3/i5.i7 (Skylake)
-It also changes 'atom' to 'bonnell' in accordance with the gcc v4.9 changes.
-Note that upstream is using the deprecated 'match=atom' flags when I believe it
-should use the newer 'march=bonnell' flag for atom processors.
+It also offers to compile passing the 'native' option which, "selects the CPU
+to generate code for at compilation time by determining the processor type of
+the compiling machine. Using -march=native enables all instruction subsets
+supported by the local machine and will produce code optimized for the local
+machine under the constraints of the selected instruction set."[3]
-I have made that change to this patch set as well. See the following kernel
-bug report to see if I'm right: https://bugzilla.kernel.org/show_bug.cgi?id=77461
+MINOR NOTES
+This patch also changes 'atom' to 'bonnell' in accordance with the gcc v4.9
+changes. Note that upstream is using the deprecated 'match=atom' flags when I
+believe it should use the newer 'march=bonnell' flag for atom processors.[2]
-This patch will expand the number of microarchitectures to include newer
-processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family
-14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD
-Family 15h (Steamroller), Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7
-(Nehalem), Intel 1.5 Gen Core i3/i5/i7 (Westmere), Intel 2nd Gen Core i3/i5/i7
-(Sandybridge), Intel 3rd Gen Core i3/i5/i7 (Ivybridge), Intel 4th Gen Core
-i3/i5/i7 (Haswell), Intel 5th Gen Core i3/i5/i7 (Broadwell), and the low power
-Silvermont series of Atom processors (Silvermont). It also offers the compiler
-the 'native' flag.
+It is not recommended to compile on Atom-CPUs with the 'native' option.[4] The
+recommendation is use to the 'atom' option instead.
+BENEFITS
Small but real speed increases are measurable using a make endpoint comparing
a generic kernel to one built with one of the respective microarchs.
@@ -38,8 +56,18 @@ REQUIREMENTS
linux version >=3.15
gcc version >=4.9
---- a/arch/x86/include/asm/module.h 2015-08-30 14:34:09.000000000 -0400
-+++ b/arch/x86/include/asm/module.h 2015-11-06 14:18:24.234941036 -0500
+ACKNOWLEDGMENTS
+This patch builds on the seminal work by Jeroen.[5]
+
+REFERENCES
+1. https://gcc.gnu.org/gcc-4.9/changes.html
+2. https://bugzilla.kernel.org/show_bug.cgi?id=77461
+3. https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html
+4. https://github.com/graysky2/kernel_gcc_patch/issues/15
+5. http://www.linuxforge.net/docs/linux/linux-gcc.php
+
+--- a/arch/x86/include/asm/module.h 2016-12-11 14:17:54.000000000 -0500
++++ b/arch/x86/include/asm/module.h 2017-01-06 20:44:36.602227264 -0500
@@ -15,6 +15,24 @@
#define MODULE_PROC_FAMILY "586MMX "
#elif defined CONFIG_MCORE2
@@ -65,7 +93,7 @@ gcc version >=4.9
#elif defined CONFIG_MATOM
#define MODULE_PROC_FAMILY "ATOM "
#elif defined CONFIG_M686
-@@ -33,6 +51,22 @@
+@@ -33,6 +51,26 @@
#define MODULE_PROC_FAMILY "K7 "
#elif defined CONFIG_MK8
#define MODULE_PROC_FAMILY "K8 "
@@ -80,17 +108,29 @@ gcc version >=4.9
+#elif defined CONFIG_MBULLDOZER
+#define MODULE_PROC_FAMILY "BULLDOZER "
+#elif defined CONFIG_MPILEDRIVER
-+#define MODULE_PROC_FAMILY "STEAMROLLER "
-+#elif defined CONFIG_MSTEAMROLLER
+#define MODULE_PROC_FAMILY "PILEDRIVER "
++#elif defined CONFIG_MSTEAMROLLER
++#define MODULE_PROC_FAMILY "STEAMROLLER "
+#elif defined CONFIG_MJAGUAR
+#define MODULE_PROC_FAMILY "JAGUAR "
++#elif defined CONFIG_MEXCAVATOR
++#define MODULE_PROC_FAMILY "EXCAVATOR "
++#elif defined CONFIG_MZEN
++#define MODULE_PROC_FAMILY "ZEN "
#elif defined CONFIG_MELAN
#define MODULE_PROC_FAMILY "ELAN "
#elif defined CONFIG_MCRUSOE
---- a/arch/x86/Kconfig.cpu 2015-08-30 14:34:09.000000000 -0400
-+++ b/arch/x86/Kconfig.cpu 2015-11-06 14:20:14.948369244 -0500
-@@ -137,9 +137,8 @@ config MPENTIUM4
+--- a/arch/x86/Kconfig.cpu 2016-12-11 14:17:54.000000000 -0500
++++ b/arch/x86/Kconfig.cpu 2017-01-06 20:46:14.004109597 -0500
+@@ -115,6 +115,7 @@ config MPENTIUMM
+ config MPENTIUM4
+ bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
+ depends on X86_32
++ select X86_P6_NOP
+ ---help---
+ Select this for Intel Pentium 4 chips. This includes the
+ Pentium 4, Pentium D, P4-based Celeron and Xeon, and
+@@ -147,9 +148,8 @@ config MPENTIUM4
-Paxville
-Dempsey
@@ -101,7 +141,7 @@ gcc version >=4.9
depends on X86_32
---help---
Select this for an AMD K6-family processor. Enables use of
-@@ -147,7 +146,7 @@ config MK6
+@@ -157,7 +157,7 @@ config MK6
flags to GCC.
config MK7
@@ -110,7 +150,7 @@ gcc version >=4.9
depends on X86_32
---help---
Select this for an AMD Athlon K7-family processor. Enables use of
-@@ -155,12 +154,69 @@ config MK7
+@@ -165,12 +165,83 @@ config MK7
flags to GCC.
config MK8
@@ -139,54 +179,77 @@ gcc version >=4.9
+config MBARCELONA
+ bool "AMD Barcelona"
+ ---help---
-+ Select this for AMD Barcelona and newer processors.
++ Select this for AMD Family 10h Barcelona processors.
+
+ Enables -march=barcelona
+
+config MBOBCAT
+ bool "AMD Bobcat"
+ ---help---
-+ Select this for AMD Bobcat processors.
++ Select this for AMD Family 14h Bobcat processors.
+
+ Enables -march=btver1
+
++config MJAGUAR
++ bool "AMD Jaguar"
++ ---help---
++ Select this for AMD Family 16h Jaguar processors.
++
++ Enables -march=btver2
++
+config MBULLDOZER
+ bool "AMD Bulldozer"
+ ---help---
-+ Select this for AMD Bulldozer processors.
++ Select this for AMD Family 15h Bulldozer processors.
+
+ Enables -march=bdver1
+
+config MPILEDRIVER
+ bool "AMD Piledriver"
+ ---help---
-+ Select this for AMD Piledriver processors.
++ Select this for AMD Family 15h Piledriver processors.
+
+ Enables -march=bdver2
+
+config MSTEAMROLLER
+ bool "AMD Steamroller"
+ ---help---
-+ Select this for AMD Steamroller processors.
++ Select this for AMD Family 15h Steamroller processors.
+
+ Enables -march=bdver3
+
-+config MJAGUAR
-+ bool "AMD Jaguar"
++config MEXCAVATOR
++ bool "AMD Excavator"
+ ---help---
-+ Select this for AMD Jaguar processors.
++ Select this for AMD Family 15h Excavator processors.
+
-+ Enables -march=btver2
++ Enables -march=bdver4
++
++config MZEN
++ bool "AMD Zen"
++ ---help---
++ Select this for AMD Family 17h Zen processors.
++
++ Enables -march=znver1
+
config MCRUSOE
bool "Crusoe"
depends on X86_32
-@@ -251,8 +307,17 @@ config MPSC
+@@ -252,6 +323,7 @@ config MVIAC7
+
+ config MPSC
+ bool "Intel P4 / older Netburst based Xeon"
++ select X86_P6_NOP
+ depends on X86_64
+ ---help---
+ Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
+@@ -261,8 +333,19 @@ config MPSC
using the cpu family field
in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
+config MATOM
+ bool "Intel Atom"
++ select X86_P6_NOP
+ ---help---
+
+ Select this for the Intel Atom platform. Intel Atom CPUs have an
@@ -197,10 +260,11 @@ gcc version >=4.9
config MCORE2
- bool "Core 2/newer Xeon"
+ bool "Intel Core 2"
++ select X86_P6_NOP
---help---
Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
-@@ -260,14 +325,71 @@ config MCORE2
+@@ -270,14 +353,79 @@ config MCORE2
family in /proc/cpuinfo. Newer ones have 6 and older ones 15
(not a typo)
@@ -210,6 +274,7 @@ gcc version >=4.9
+
+config MNEHALEM
+ bool "Intel Nehalem"
++ select X86_P6_NOP
---help---
- Select this for the Intel Atom platform. Intel Atom CPUs have an
@@ -222,6 +287,7 @@ gcc version >=4.9
+
+config MWESTMERE
+ bool "Intel Westmere"
++ select X86_P6_NOP
+ ---help---
+
+ Select this for the Intel Westmere formerly Nehalem-C family.
@@ -230,6 +296,7 @@ gcc version >=4.9
+
+config MSILVERMONT
+ bool "Intel Silvermont"
++ select X86_P6_NOP
+ ---help---
+
+ Select this for the Intel Silvermont platform.
@@ -238,6 +305,7 @@ gcc version >=4.9
+
+config MSANDYBRIDGE
+ bool "Intel Sandy Bridge"
++ select X86_P6_NOP
+ ---help---
+
+ Select this for 2nd Gen Core processors in the Sandy Bridge family.
@@ -246,6 +314,7 @@ gcc version >=4.9
+
+config MIVYBRIDGE
+ bool "Intel Ivy Bridge"
++ select X86_P6_NOP
+ ---help---
+
+ Select this for 3rd Gen Core processors in the Ivy Bridge family.
@@ -254,6 +323,7 @@ gcc version >=4.9
+
+config MHASWELL
+ bool "Intel Haswell"
++ select X86_P6_NOP
+ ---help---
+
+ Select this for 4th Gen Core processors in the Haswell family.
@@ -262,6 +332,7 @@ gcc version >=4.9
+
+config MBROADWELL
+ bool "Intel Broadwell"
++ select X86_P6_NOP
+ ---help---
+
+ Select this for 5th Gen Core processors in the Broadwell family.
@@ -270,6 +341,7 @@ gcc version >=4.9
+
+config MSKYLAKE
+ bool "Intel Skylake"
++ select X86_P6_NOP
+ ---help---
+
+ Select this for 6th Gen Core processors in the Skylake family.
@@ -278,7 +350,7 @@ gcc version >=4.9
config GENERIC_CPU
bool "Generic-x86-64"
-@@ -276,6 +398,19 @@ config GENERIC_CPU
+@@ -286,6 +434,19 @@ config GENERIC_CPU
Generic x86-64 CPU.
Run equally well on all x86-64 CPUs.
@@ -298,16 +370,16 @@ gcc version >=4.9
endchoice
config X86_GENERIC
-@@ -300,7 +435,7 @@ config X86_INTERNODE_CACHE_SHIFT
+@@ -310,7 +471,7 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
-+ default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
++ default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
default "4" if MELAN || M486 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
-@@ -331,11 +466,11 @@ config X86_ALIGNMENT_16
+@@ -341,45 +502,46 @@ config X86_ALIGNMENT_16
config X86_INTEL_USERCOPY
def_bool y
@@ -321,7 +393,38 @@ gcc version >=4.9
config X86_USE_3DNOW
def_bool y
-@@ -359,17 +494,17 @@ config X86_P6_NOP
+ depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
+
+-#
+-# P6_NOPs are a relatively minor optimization that require a family >=
+-# 6 processor, except that it is broken on certain VIA chips.
+-# Furthermore, AMD chips prefer a totally different sequence of NOPs
+-# (which work on all CPUs). In addition, it looks like Virtual PC
+-# does not understand them.
+-#
+-# As a result, disallow these if we're not compiling for X86_64 (these
+-# NOPs do work on all x86-64 capable chips); the list of processors in
+-# the right-hand clause are the cores that benefit from this optimization.
+-#
+ config X86_P6_NOP
+- def_bool y
+- depends on X86_64
+- depends on (MCORE2 || MPENTIUM4 || MPSC)
++ default n
++ bool "Support for P6_NOPs on Intel chips"
++ depends on (MCORE2 || MPENTIUM4 || MPSC || MATOM || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MNATIVE)
++ ---help---
++ P6_NOPs are a relatively minor optimization that require a family >=
++ 6 processor, except that it is broken on certain VIA chips.
++ Furthermore, AMD chips prefer a totally different sequence of NOPs
++ (which work on all CPUs). In addition, it looks like Virtual PC
++ does not understand them.
++
++ As a result, disallow these if we're not compiling for X86_64 (these
++ NOPs do work on all x86-64 capable chips); the list of processors in
++ the right-hand clause are the cores that benefit from this optimization.
++
++ Say Y if you have Intel CPU newer than Pentium Pro, N otherwise.
config X86_TSC
def_bool y
@@ -338,13 +441,13 @@ gcc version >=4.9
config X86_CMOV
def_bool y
- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
-+ depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
++ depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
config X86_MINIMUM_CPU_FAMILY
int
---- a/arch/x86/Makefile 2015-08-30 14:34:09.000000000 -0400
-+++ b/arch/x86/Makefile 2015-11-06 14:21:05.708983344 -0500
-@@ -94,13 +94,38 @@ else
+--- a/arch/x86/Makefile 2016-12-11 14:17:54.000000000 -0500
++++ b/arch/x86/Makefile 2017-01-06 20:44:36.603227283 -0500
+@@ -104,13 +104,40 @@ else
KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
@@ -354,10 +457,12 @@ gcc version >=4.9
+ cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)
+ cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)
+ cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
++ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
+ cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
+ cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
+ cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3)
-+ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
++ cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4)
++ cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1)
cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
cflags-$(CONFIG_MCORE2) += \
@@ -386,9 +491,9 @@ gcc version >=4.9
cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
KBUILD_CFLAGS += $(cflags-y)
---- a/arch/x86/Makefile_32.cpu 2015-08-30 14:34:09.000000000 -0400
-+++ b/arch/x86/Makefile_32.cpu 2015-11-06 14:21:43.604429077 -0500
-@@ -23,7 +23,16 @@ cflags-$(CONFIG_MK6) += -march=k6
+--- a/arch/x86/Makefile_32.cpu 2016-12-11 14:17:54.000000000 -0500
++++ b/arch/x86/Makefile_32.cpu 2017-01-06 20:44:36.603227283 -0500
+@@ -23,7 +23,18 @@ cflags-$(CONFIG_MK6) += -march=k6
# Please note, that patches that add -march=athlon-xp and friends are pointless.
# They make zero difference whatsosever to performance at this time.
cflags-$(CONFIG_MK7) += -march=athlon
@@ -398,14 +503,16 @@ gcc version >=4.9
+cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon)
+cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon)
+cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon)
++cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon)
+cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon)
+cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon)
+cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3,-march=athlon)
-+cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon)
++cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4,-march=athlon)
++cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1,-march=athlon)
cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
-@@ -32,8 +41,16 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-
+@@ -32,8 +43,16 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-
cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
cflags-$(CONFIG_MVIAC7) += -march=i686
cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)