diff options
-rw-r--r-- | 5010_enable-additional-cpu-optimizations-for-gcc.patch | 225 |
1 files changed, 166 insertions, 59 deletions
diff --git a/5010_enable-additional-cpu-optimizations-for-gcc.patch b/5010_enable-additional-cpu-optimizations-for-gcc.patch index d9729b23..76cbd9d2 100644 --- a/5010_enable-additional-cpu-optimizations-for-gcc.patch +++ b/5010_enable-additional-cpu-optimizations-for-gcc.patch @@ -1,33 +1,51 @@ -WARNING - this version of the patch works with version 4.9+ of gcc and with -kernel version 3.15.x+ and should NOT be applied when compiling on older -versions due to name changes of the flags with the 4.9 release of gcc. +WARNING +This patch works with gcc versions 4.9+ and with kernel version 3.15+ and should +NOT be applied when compiling on older versions of gcc due to key name changes +of the march flags introduced with the version 4.9 release of gcc.[1] + Use the older version of this patch hosted on the same github for older -versions of gcc. For example: +versions of gcc. -corei7 --> nehalem -corei7-avx --> sandybridge -core-avx-i --> ivybridge -core-avx2 --> haswell +FEATURES +This patch adds additional CPU options to the Linux kernel accessible under: + Processor type and features ---> + Processor family ---> -For more, see: https://gcc.gnu.org/gcc-4.9/changes.html +The expanded microarchitectures include: +* AMD Improved K8-family +* AMD K10-family +* AMD Family 10h (Barcelona) +* AMD Family 14h (Bobcat) +* AMD Family 16h (Jaguar) +* AMD Family 15h (Bulldozer) +* AMD Family 15h (Piledriver) +* AMD Family 15h (Steamroller) +* AMD Family 15h (Excavator) +* AMD Family 17h (Zen) +* Intel Silvermont low-power processors +* Intel 1st Gen Core i3/i5/i7 (Nehalem) +* Intel 1.5 Gen Core i3/i5/i7 (Westmere) +* Intel 2nd Gen Core i3/i5/i7 (Sandybridge) +* Intel 3rd Gen Core i3/i5/i7 (Ivybridge) +* Intel 4th Gen Core i3/i5/i7 (Haswell) +* Intel 5th Gen Core i3/i5/i7 (Broadwell) +* Intel 6th Gen Core i3/i5.i7 (Skylake) -It also changes 'atom' to 'bonnell' in accordance with the gcc v4.9 changes. -Note that upstream is using the deprecated 'match=atom' flags when I believe it -should use the newer 'march=bonnell' flag for atom processors. +It also offers to compile passing the 'native' option which, "selects the CPU +to generate code for at compilation time by determining the processor type of +the compiling machine. Using -march=native enables all instruction subsets +supported by the local machine and will produce code optimized for the local +machine under the constraints of the selected instruction set."[3] -I have made that change to this patch set as well. See the following kernel -bug report to see if I'm right: https://bugzilla.kernel.org/show_bug.cgi?id=77461 +MINOR NOTES +This patch also changes 'atom' to 'bonnell' in accordance with the gcc v4.9 +changes. Note that upstream is using the deprecated 'match=atom' flags when I +believe it should use the newer 'march=bonnell' flag for atom processors.[2] -This patch will expand the number of microarchitectures to include newer -processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family -14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD -Family 15h (Steamroller), Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 -(Nehalem), Intel 1.5 Gen Core i3/i5/i7 (Westmere), Intel 2nd Gen Core i3/i5/i7 -(Sandybridge), Intel 3rd Gen Core i3/i5/i7 (Ivybridge), Intel 4th Gen Core -i3/i5/i7 (Haswell), Intel 5th Gen Core i3/i5/i7 (Broadwell), and the low power -Silvermont series of Atom processors (Silvermont). It also offers the compiler -the 'native' flag. +It is not recommended to compile on Atom-CPUs with the 'native' option.[4] The +recommendation is use to the 'atom' option instead. +BENEFITS Small but real speed increases are measurable using a make endpoint comparing a generic kernel to one built with one of the respective microarchs. @@ -38,8 +56,18 @@ REQUIREMENTS linux version >=3.15 gcc version >=4.9 ---- a/arch/x86/include/asm/module.h 2015-08-30 14:34:09.000000000 -0400 -+++ b/arch/x86/include/asm/module.h 2015-11-06 14:18:24.234941036 -0500 +ACKNOWLEDGMENTS +This patch builds on the seminal work by Jeroen.[5] + +REFERENCES +1. https://gcc.gnu.org/gcc-4.9/changes.html +2. https://bugzilla.kernel.org/show_bug.cgi?id=77461 +3. https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html +4. https://github.com/graysky2/kernel_gcc_patch/issues/15 +5. http://www.linuxforge.net/docs/linux/linux-gcc.php + +--- a/arch/x86/include/asm/module.h 2016-12-11 14:17:54.000000000 -0500 ++++ b/arch/x86/include/asm/module.h 2017-01-06 20:44:36.602227264 -0500 @@ -15,6 +15,24 @@ #define MODULE_PROC_FAMILY "586MMX " #elif defined CONFIG_MCORE2 @@ -65,7 +93,7 @@ gcc version >=4.9 #elif defined CONFIG_MATOM #define MODULE_PROC_FAMILY "ATOM " #elif defined CONFIG_M686 -@@ -33,6 +51,22 @@ +@@ -33,6 +51,26 @@ #define MODULE_PROC_FAMILY "K7 " #elif defined CONFIG_MK8 #define MODULE_PROC_FAMILY "K8 " @@ -80,17 +108,29 @@ gcc version >=4.9 +#elif defined CONFIG_MBULLDOZER +#define MODULE_PROC_FAMILY "BULLDOZER " +#elif defined CONFIG_MPILEDRIVER -+#define MODULE_PROC_FAMILY "STEAMROLLER " -+#elif defined CONFIG_MSTEAMROLLER +#define MODULE_PROC_FAMILY "PILEDRIVER " ++#elif defined CONFIG_MSTEAMROLLER ++#define MODULE_PROC_FAMILY "STEAMROLLER " +#elif defined CONFIG_MJAGUAR +#define MODULE_PROC_FAMILY "JAGUAR " ++#elif defined CONFIG_MEXCAVATOR ++#define MODULE_PROC_FAMILY "EXCAVATOR " ++#elif defined CONFIG_MZEN ++#define MODULE_PROC_FAMILY "ZEN " #elif defined CONFIG_MELAN #define MODULE_PROC_FAMILY "ELAN " #elif defined CONFIG_MCRUSOE ---- a/arch/x86/Kconfig.cpu 2015-08-30 14:34:09.000000000 -0400 -+++ b/arch/x86/Kconfig.cpu 2015-11-06 14:20:14.948369244 -0500 -@@ -137,9 +137,8 @@ config MPENTIUM4 +--- a/arch/x86/Kconfig.cpu 2016-12-11 14:17:54.000000000 -0500 ++++ b/arch/x86/Kconfig.cpu 2017-01-06 20:46:14.004109597 -0500 +@@ -115,6 +115,7 @@ config MPENTIUMM + config MPENTIUM4 + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" + depends on X86_32 ++ select X86_P6_NOP + ---help--- + Select this for Intel Pentium 4 chips. This includes the + Pentium 4, Pentium D, P4-based Celeron and Xeon, and +@@ -147,9 +148,8 @@ config MPENTIUM4 -Paxville -Dempsey @@ -101,7 +141,7 @@ gcc version >=4.9 depends on X86_32 ---help--- Select this for an AMD K6-family processor. Enables use of -@@ -147,7 +146,7 @@ config MK6 +@@ -157,7 +157,7 @@ config MK6 flags to GCC. config MK7 @@ -110,7 +150,7 @@ gcc version >=4.9 depends on X86_32 ---help--- Select this for an AMD Athlon K7-family processor. Enables use of -@@ -155,12 +154,69 @@ config MK7 +@@ -165,12 +165,83 @@ config MK7 flags to GCC. config MK8 @@ -139,54 +179,77 @@ gcc version >=4.9 +config MBARCELONA + bool "AMD Barcelona" + ---help--- -+ Select this for AMD Barcelona and newer processors. ++ Select this for AMD Family 10h Barcelona processors. + + Enables -march=barcelona + +config MBOBCAT + bool "AMD Bobcat" + ---help--- -+ Select this for AMD Bobcat processors. ++ Select this for AMD Family 14h Bobcat processors. + + Enables -march=btver1 + ++config MJAGUAR ++ bool "AMD Jaguar" ++ ---help--- ++ Select this for AMD Family 16h Jaguar processors. ++ ++ Enables -march=btver2 ++ +config MBULLDOZER + bool "AMD Bulldozer" + ---help--- -+ Select this for AMD Bulldozer processors. ++ Select this for AMD Family 15h Bulldozer processors. + + Enables -march=bdver1 + +config MPILEDRIVER + bool "AMD Piledriver" + ---help--- -+ Select this for AMD Piledriver processors. ++ Select this for AMD Family 15h Piledriver processors. + + Enables -march=bdver2 + +config MSTEAMROLLER + bool "AMD Steamroller" + ---help--- -+ Select this for AMD Steamroller processors. ++ Select this for AMD Family 15h Steamroller processors. + + Enables -march=bdver3 + -+config MJAGUAR -+ bool "AMD Jaguar" ++config MEXCAVATOR ++ bool "AMD Excavator" + ---help--- -+ Select this for AMD Jaguar processors. ++ Select this for AMD Family 15h Excavator processors. + -+ Enables -march=btver2 ++ Enables -march=bdver4 ++ ++config MZEN ++ bool "AMD Zen" ++ ---help--- ++ Select this for AMD Family 17h Zen processors. ++ ++ Enables -march=znver1 + config MCRUSOE bool "Crusoe" depends on X86_32 -@@ -251,8 +307,17 @@ config MPSC +@@ -252,6 +323,7 @@ config MVIAC7 + + config MPSC + bool "Intel P4 / older Netburst based Xeon" ++ select X86_P6_NOP + depends on X86_64 + ---help--- + Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey +@@ -261,8 +333,19 @@ config MPSC using the cpu family field in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. +config MATOM + bool "Intel Atom" ++ select X86_P6_NOP + ---help--- + + Select this for the Intel Atom platform. Intel Atom CPUs have an @@ -197,10 +260,11 @@ gcc version >=4.9 config MCORE2 - bool "Core 2/newer Xeon" + bool "Intel Core 2" ++ select X86_P6_NOP ---help--- Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and -@@ -260,14 +325,71 @@ config MCORE2 +@@ -270,14 +353,79 @@ config MCORE2 family in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) @@ -210,6 +274,7 @@ gcc version >=4.9 + +config MNEHALEM + bool "Intel Nehalem" ++ select X86_P6_NOP ---help--- - Select this for the Intel Atom platform. Intel Atom CPUs have an @@ -222,6 +287,7 @@ gcc version >=4.9 + +config MWESTMERE + bool "Intel Westmere" ++ select X86_P6_NOP + ---help--- + + Select this for the Intel Westmere formerly Nehalem-C family. @@ -230,6 +296,7 @@ gcc version >=4.9 + +config MSILVERMONT + bool "Intel Silvermont" ++ select X86_P6_NOP + ---help--- + + Select this for the Intel Silvermont platform. @@ -238,6 +305,7 @@ gcc version >=4.9 + +config MSANDYBRIDGE + bool "Intel Sandy Bridge" ++ select X86_P6_NOP + ---help--- + + Select this for 2nd Gen Core processors in the Sandy Bridge family. @@ -246,6 +314,7 @@ gcc version >=4.9 + +config MIVYBRIDGE + bool "Intel Ivy Bridge" ++ select X86_P6_NOP + ---help--- + + Select this for 3rd Gen Core processors in the Ivy Bridge family. @@ -254,6 +323,7 @@ gcc version >=4.9 + +config MHASWELL + bool "Intel Haswell" ++ select X86_P6_NOP + ---help--- + + Select this for 4th Gen Core processors in the Haswell family. @@ -262,6 +332,7 @@ gcc version >=4.9 + +config MBROADWELL + bool "Intel Broadwell" ++ select X86_P6_NOP + ---help--- + + Select this for 5th Gen Core processors in the Broadwell family. @@ -270,6 +341,7 @@ gcc version >=4.9 + +config MSKYLAKE + bool "Intel Skylake" ++ select X86_P6_NOP + ---help--- + + Select this for 6th Gen Core processors in the Skylake family. @@ -278,7 +350,7 @@ gcc version >=4.9 config GENERIC_CPU bool "Generic-x86-64" -@@ -276,6 +398,19 @@ config GENERIC_CPU +@@ -286,6 +434,19 @@ config GENERIC_CPU Generic x86-64 CPU. Run equally well on all x86-64 CPUs. @@ -298,16 +370,16 @@ gcc version >=4.9 endchoice config X86_GENERIC -@@ -300,7 +435,7 @@ config X86_INTERNODE_CACHE_SHIFT +@@ -310,7 +471,7 @@ config X86_INTERNODE_CACHE_SHIFT config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU -+ default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU ++ default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU default "4" if MELAN || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX -@@ -331,11 +466,11 @@ config X86_ALIGNMENT_16 +@@ -341,45 +502,46 @@ config X86_ALIGNMENT_16 config X86_INTEL_USERCOPY def_bool y @@ -321,7 +393,38 @@ gcc version >=4.9 config X86_USE_3DNOW def_bool y -@@ -359,17 +494,17 @@ config X86_P6_NOP + depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML + +-# +-# P6_NOPs are a relatively minor optimization that require a family >= +-# 6 processor, except that it is broken on certain VIA chips. +-# Furthermore, AMD chips prefer a totally different sequence of NOPs +-# (which work on all CPUs). In addition, it looks like Virtual PC +-# does not understand them. +-# +-# As a result, disallow these if we're not compiling for X86_64 (these +-# NOPs do work on all x86-64 capable chips); the list of processors in +-# the right-hand clause are the cores that benefit from this optimization. +-# + config X86_P6_NOP +- def_bool y +- depends on X86_64 +- depends on (MCORE2 || MPENTIUM4 || MPSC) ++ default n ++ bool "Support for P6_NOPs on Intel chips" ++ depends on (MCORE2 || MPENTIUM4 || MPSC || MATOM || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MNATIVE) ++ ---help--- ++ P6_NOPs are a relatively minor optimization that require a family >= ++ 6 processor, except that it is broken on certain VIA chips. ++ Furthermore, AMD chips prefer a totally different sequence of NOPs ++ (which work on all CPUs). In addition, it looks like Virtual PC ++ does not understand them. ++ ++ As a result, disallow these if we're not compiling for X86_64 (these ++ NOPs do work on all x86-64 capable chips); the list of processors in ++ the right-hand clause are the cores that benefit from this optimization. ++ ++ Say Y if you have Intel CPU newer than Pentium Pro, N otherwise. config X86_TSC def_bool y @@ -338,13 +441,13 @@ gcc version >=4.9 config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) -+ depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX) ++ depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX) config X86_MINIMUM_CPU_FAMILY int ---- a/arch/x86/Makefile 2015-08-30 14:34:09.000000000 -0400 -+++ b/arch/x86/Makefile 2015-11-06 14:21:05.708983344 -0500 -@@ -94,13 +94,38 @@ else +--- a/arch/x86/Makefile 2016-12-11 14:17:54.000000000 -0500 ++++ b/arch/x86/Makefile 2017-01-06 20:44:36.603227283 -0500 +@@ -104,13 +104,40 @@ else KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) @@ -354,10 +457,12 @@ gcc version >=4.9 + cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10) + cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona) + cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1) ++ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2) + cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1) + cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2) + cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3) -+ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2) ++ cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4) ++ cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) cflags-$(CONFIG_MCORE2) += \ @@ -386,9 +491,9 @@ gcc version >=4.9 cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) KBUILD_CFLAGS += $(cflags-y) ---- a/arch/x86/Makefile_32.cpu 2015-08-30 14:34:09.000000000 -0400 -+++ b/arch/x86/Makefile_32.cpu 2015-11-06 14:21:43.604429077 -0500 -@@ -23,7 +23,16 @@ cflags-$(CONFIG_MK6) += -march=k6 +--- a/arch/x86/Makefile_32.cpu 2016-12-11 14:17:54.000000000 -0500 ++++ b/arch/x86/Makefile_32.cpu 2017-01-06 20:44:36.603227283 -0500 +@@ -23,7 +23,18 @@ cflags-$(CONFIG_MK6) += -march=k6 # Please note, that patches that add -march=athlon-xp and friends are pointless. # They make zero difference whatsosever to performance at this time. cflags-$(CONFIG_MK7) += -march=athlon @@ -398,14 +503,16 @@ gcc version >=4.9 +cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon) +cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon) +cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon) ++cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon) +cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon) +cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon) +cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3,-march=athlon) -+cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon) ++cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4,-march=athlon) ++cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1,-march=athlon) cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) -@@ -32,8 +41,16 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc- +@@ -32,8 +43,16 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc- cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) cflags-$(CONFIG_MVIAC7) += -march=i686 cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) |