powerpc: Add support for popcnt instructions

POWER5 added popcntb, and POWER7 added popcntw and popcntd. As a first step
this patch does all the work out of line, but it would be nice to implement
them as inlines with an out of line fallback.

The performance issue with hweight was noticed when disabling SMT on a large
(192 thread) POWER7 box. The patch improves that testcase by about 8%.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
Anton Blanchard
2010-08-12 16:28:09 +00:00
committed by Benjamin Herrenschmidt
parent 7208364652
commit 64ff312876
5 changed files with 133 additions and 4 deletions
+1 -1
View File
@@ -16,7 +16,7 @@ obj-$(CONFIG_HAS_IOMEM) += devres.o
obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
memcpy_64.o usercopy_64.o mem_64.o string.o \
checksum_wrappers_64.o
checksum_wrappers_64.o hweight_64.o
obj-$(CONFIG_XMON) += sstep.o ldstfp.o
obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o
+110
View File
@@ -0,0 +1,110 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2010
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
/* Note: This code relies on -mminimal-toc */
_GLOBAL(__arch_hweight8)
BEGIN_FTR_SECTION
b .__sw_hweight8
nop
nop
FTR_SECTION_ELSE
popcntb r3,r3
clrldi r3,r3,64-8
blr
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
_GLOBAL(__arch_hweight16)
BEGIN_FTR_SECTION
b .__sw_hweight16
nop
nop
nop
nop
FTR_SECTION_ELSE
BEGIN_FTR_SECTION_NESTED(50)
popcntb r3,r3
srdi r4,r3,8
add r3,r4,r3
clrldi r3,r3,64-8
blr
FTR_SECTION_ELSE_NESTED(50)
clrlwi r3,r3,16
popcntw r3,r3
clrldi r3,r3,64-8
blr
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
_GLOBAL(__arch_hweight32)
BEGIN_FTR_SECTION
b .__sw_hweight32
nop
nop
nop
nop
nop
nop
FTR_SECTION_ELSE
BEGIN_FTR_SECTION_NESTED(51)
popcntb r3,r3
srdi r4,r3,16
add r3,r4,r3
srdi r4,r3,8
add r3,r4,r3
clrldi r3,r3,64-8
blr
FTR_SECTION_ELSE_NESTED(51)
popcntw r3,r3
clrldi r3,r3,64-8
blr
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
_GLOBAL(__arch_hweight64)
BEGIN_FTR_SECTION
b .__sw_hweight64
nop
nop
nop
nop
nop
nop
nop
nop
FTR_SECTION_ELSE
BEGIN_FTR_SECTION_NESTED(52)
popcntb r3,r3
srdi r4,r3,32
add r3,r4,r3
srdi r4,r3,16
add r3,r4,r3
srdi r4,r3,8
add r3,r4,r3
clrldi r3,r3,64-8
blr
FTR_SECTION_ELSE_NESTED(52)
popcntd r3,r3
clrldi r3,r3,64-8
blr
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)