mirror of
https://github.com/asterisk/asterisk.git
synced 2025-09-02 19:16:15 +00:00
Simplify build system architecture optimization
This change to the build system rips out any usage of PROC along with architecture-specific optimizations in favor of using -march=native where it is supported. This fixes broken builds on 64bit Intel systems and results in better optimized code on systems running GCC 4.2+. Review: https://reviewboard.asterisk.org/r/1852/ (closes issue ASTERISK-19462) ........ Merged revisions 361955 from http://svn.asterisk.org/svn/asterisk/branches/1.8 ........ Merged revisions 361956 from http://svn.asterisk.org/svn/asterisk/branches/10 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@361968 65c4cc65-6c06-0410-ace0-fbb531ad65f3
This commit is contained in:
50
Makefile
50
Makefile
@@ -66,7 +66,6 @@ export ASTCONFPATH
|
||||
export ASTKEYDIR
|
||||
|
||||
export OSARCH # Operating system
|
||||
export PROC # Processor type
|
||||
|
||||
export NOISY_BUILD # Used in Makefile.rules
|
||||
export MENUSELECT_CFLAGS # Options selected in menuselect.
|
||||
@@ -170,36 +169,6 @@ OTHER_SUBDIR_CFLAGS="-I$(ASTTOPDIR)/include"
|
||||
OPTIONS=
|
||||
|
||||
ifeq ($(OSARCH),linux-gnu)
|
||||
ifeq ($(PROC),x86_64)
|
||||
# You must have GCC 3.4 to use k8, otherwise use athlon
|
||||
PROC=k8
|
||||
#PROC=athlon
|
||||
endif
|
||||
|
||||
ifeq ($(PROC),sparc64)
|
||||
#The problem with sparc is the best stuff is in newer versions of gcc (post 3.0) only.
|
||||
#This works for even old (2.96) versions of gcc and provides a small boost either way.
|
||||
#A ultrasparc cpu is really v9 but the stock debian stable 3.0 gcc doesn't support it.
|
||||
#So we go lowest common available by gcc and go a step down, still a step up from
|
||||
#the default as we now have a better instruction set to work with. - Belgarath
|
||||
PROC=ultrasparc
|
||||
OPTIONS+=$(shell if $(CC) -mtune=$(PROC) -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-mtune=$(PROC)"; fi)
|
||||
OPTIONS+=$(shell if $(CC) -mcpu=v8 -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-mcpu=v8"; fi)
|
||||
OPTIONS+=-fomit-frame-pointer
|
||||
endif
|
||||
|
||||
ifeq ($(PROC),arm)
|
||||
# The Cirrus logic is the only heavily shipping arm processor with a real floating point unit
|
||||
ifeq ($(SUB_PROC),maverick)
|
||||
OPTIONS+=-fsigned-char -mcpu=ep9312
|
||||
else
|
||||
ifeq ($(SUB_PROC),xscale)
|
||||
OPTIONS+=-fsigned-char -mcpu=xscale
|
||||
else
|
||||
OPTIONS+=-fsigned-char
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
# flag to tell 'ldconfig' to only process specified directories
|
||||
LDCONFIG_FLAGS=-n
|
||||
endif
|
||||
@@ -235,26 +204,7 @@ ifneq ($(findstring BSD,$(OSARCH)),)
|
||||
_ASTCFLAGS+=-isystem /usr/local/include
|
||||
endif
|
||||
|
||||
ifeq ($(findstring -march,$(_ASTCFLAGS) $(ASTCFLAGS)),)
|
||||
ifneq ($(AST_MARCH_NATIVE),)
|
||||
_ASTCFLAGS+=$(AST_MARCH_NATIVE)
|
||||
else
|
||||
ifneq ($(PROC),ultrasparc)
|
||||
_ASTCFLAGS+=$(shell if $(CC) -march=$(PROC) -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-march=$(PROC)"; fi)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(PROC),ppc)
|
||||
_ASTCFLAGS+=-fsigned-char
|
||||
endif
|
||||
|
||||
ifeq ($(OSARCH),FreeBSD)
|
||||
ifeq ($(findstring -march,$(_ASTCFLAGS) $(ASTCFLAGS)),)
|
||||
ifeq ($(PROC),i386)
|
||||
_ASTCFLAGS+=-march=i686
|
||||
endif
|
||||
endif
|
||||
# -V is understood by BSD Make, not by GNU make.
|
||||
BSDVERSION=$(shell make -V OSVERSION -f /usr/share/mk/bsd.port.subdir.mk)
|
||||
_ASTCFLAGS+=$(shell if test $(BSDVERSION) -lt 500016 ; then echo "-D_THREAD_SAFE"; fi)
|
||||
|
@@ -84,6 +84,10 @@ ifeq ($(findstring DONT_OPTIMIZE,$(MENUSELECT_CFLAGS))$(AST_DEVMODE),DONT_OPTIMI
|
||||
COMPILE_DOUBLE=yes
|
||||
endif
|
||||
|
||||
ifeq ($(findstring BUILD_NATIVE,$(MENUSELECT_CFLAGS)),BUILD_NATIVE)
|
||||
_ASTCFLAGS+=-march=native
|
||||
endif
|
||||
|
||||
%.o: %.s
|
||||
$(ECHO_PREFIX) echo " [AS] $< -> $@"
|
||||
ifeq ($(COMPILE_DOUBLE),yes)
|
||||
|
@@ -71,4 +71,9 @@
|
||||
<member name="INTEGER_CALLERID" displayname="Use the (less accurate) integer-based method for decoding FSK tones (for embedded systems)">
|
||||
<support_level>extended</support_level>
|
||||
</member>
|
||||
<member name="BUILD_NATIVE" displayname="Allow compiler to generate code optimized for the CPU on which the build is performed.">
|
||||
<support_level>core</support_level>
|
||||
<defaultenabled>yes</defaultenabled>
|
||||
<depend>native_arch</depend>
|
||||
</member>
|
||||
</category>
|
||||
|
@@ -66,3 +66,4 @@ VPB=@PBX_VPB@
|
||||
WINARCH=@PBX_WINARCH@
|
||||
ZLIB=@PBX_ZLIB@
|
||||
TIMERFD=@PBX_TIMERFD@
|
||||
NATIVE_ARCH=@AST_NATIVE_ARCH@
|
||||
|
@@ -45,35 +45,6 @@ ifeq ($(shell $(CC) -v 2>&1 | awk '/^gcc version/ { split($$3, v, "."); printf "
|
||||
OPTIMIZE=-O2
|
||||
endif
|
||||
|
||||
# If the compiler's '-march' flag has been specified already, then assume it's a value
|
||||
# that is what the user wants (or has been determined by the configure script). If not,
|
||||
# do some simple logic to set a decent value
|
||||
ifeq ($(findstring -march,$(_ASTCFLAGS) $(ASTCFLAGS)),)
|
||||
ifeq (,$(findstring $(shell uname -s),Darwin SunOS))
|
||||
ifeq (,$(strip $(findstring $(PROC) ,"x86_64 amd64 ultrasparc sparc64 arm armv5b armeb ppc powerpc ppc64 ia64 s390 bfin mipsel mips ")))
|
||||
ifeq (,$(strip $(findstring $(shell uname -m) ,"ppc ppc64 alpha armv4l s390 ")))
|
||||
OPTIMIZE+=-march=$(PROC)
|
||||
endif
|
||||
endif
|
||||
else
|
||||
ifneq (,$(findstring $(OSARCH),Darwin))
|
||||
ifeq ($(shell if test `/usr/bin/sw_vers -productVersion | cut -c4` -gt 5; then echo 6; else echo 0; fi),6)
|
||||
# Snow Leopard/Lion reports i386, even though it's really x86_64
|
||||
OPTIMIZE+=-mtune=native
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
#The problem with sparc is the best stuff is in newer versions of gcc (post 3.0) only.
|
||||
#This works for even old (2.96) versions of gcc and provides a small boost either way.
|
||||
#A ultrasparc cpu is really v9 but the stock debian stable 3.0 gcc doesn't support it.
|
||||
#So we go lowest common available by gcc and go a step down, still a step up from
|
||||
#the default as we now have a better instruction set to work with. - Belgarath
|
||||
ifeq ($(PROC),ultrasparc)
|
||||
OPTIMIZE+=-mcpu=v8 -mtune=$(PROC) -O3
|
||||
endif
|
||||
endif
|
||||
|
||||
PG =
|
||||
#PG = -g -pg
|
||||
######### Profiling flags. If you don't know what that means, leave it blank.
|
||||
@@ -224,17 +195,6 @@ GSM_SOURCES = $(SRC)/add.c \
|
||||
$(SRC)/short_term.c \
|
||||
$(SRC)/table.c
|
||||
|
||||
# add k6-specific code only if not on a non-k6 hardware or proc.
|
||||
# XXX Keep a space after each findstring argument
|
||||
# XXX should merge with GSM_OBJECTS
|
||||
ifeq ($(OSARCH),linux-gnu)
|
||||
ifeq (,$(findstring $(shell uname -m) , x86_64 amd64 ppc ppc64 alpha armv4l sparc64 parisc s390 ))
|
||||
ifeq (,$(findstring $(PROC) , arm armv5b armeb powerpc ia64 s390 bfin mipsel mips ))
|
||||
GSM_SOURCES+= $(SRC)/k6opt.s
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
TOAST_SOURCES = $(SRC)/toast.c \
|
||||
$(SRC)/toast_lin.c \
|
||||
$(SRC)/toast_ulaw.c \
|
||||
@@ -279,14 +239,6 @@ GSM_OBJECTS = $(SRC)/add.o \
|
||||
$(SRC)/short_term.o \
|
||||
$(SRC)/table.o
|
||||
|
||||
ifeq ($(OSARCH),linux-gnu)
|
||||
ifeq (,$(findstring $(shell uname -m) , x86_64 amd64 ppc ppc64 alpha armv4l sparc64 parisc ))
|
||||
ifeq (,$(findstring $(PROC) , arm armv5b armeb powerpc ia64 bfin mipsel mips ))
|
||||
GSM_OBJECTS+= $(SRC)/k6opt.o
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
TOAST_OBJECTS = $(SRC)/toast.o \
|
||||
$(SRC)/toast_lin.o \
|
||||
$(SRC)/toast_ulaw.o \
|
||||
|
@@ -1,739 +0,0 @@
|
||||
.file "k6opt.s"
|
||||
.version "01.01"
|
||||
/* gcc2_compiled.: */
|
||||
.section .rodata
|
||||
.align 4
|
||||
.type coefs,@object
|
||||
.size coefs,24
|
||||
coefs:
|
||||
.value -134
|
||||
.value -374
|
||||
.value 0
|
||||
.value 2054
|
||||
.value 5741
|
||||
.value 8192
|
||||
.value 5741
|
||||
.value 2054
|
||||
.value 0
|
||||
.value -374
|
||||
.value -134
|
||||
.value 0
|
||||
.text
|
||||
.align 4
|
||||
/* void Weighting_filter (const short *e, short *x) */
|
||||
.globl Weighting_filter
|
||||
.type Weighting_filter,@function
|
||||
Weighting_filter:
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 12(%ebp),%edi
|
||||
movl 8(%ebp),%ebx
|
||||
addl $-10,%ebx
|
||||
emms
|
||||
movl $0x1000,%eax; movd %eax,%mm5 /* for rounding */
|
||||
movq coefs,%mm1
|
||||
movq coefs+8,%mm2
|
||||
movq coefs+16,%mm3
|
||||
xorl %esi,%esi
|
||||
.p2align 2
|
||||
.L21:
|
||||
movq (%ebx,%esi,2),%mm0
|
||||
pmaddwd %mm1,%mm0
|
||||
|
||||
movq 8(%ebx,%esi,2),%mm4
|
||||
pmaddwd %mm2,%mm4
|
||||
paddd %mm4,%mm0
|
||||
|
||||
movq 16(%ebx,%esi,2),%mm4
|
||||
pmaddwd %mm3,%mm4
|
||||
paddd %mm4,%mm0
|
||||
|
||||
movq %mm0,%mm4
|
||||
punpckhdq %mm0,%mm4 /* mm4 has high int32 of mm0 dup'd */
|
||||
paddd %mm4,%mm0;
|
||||
|
||||
paddd %mm5,%mm0 /* add for roundoff */
|
||||
psrad $13,%mm0
|
||||
packssdw %mm0,%mm0
|
||||
movd %mm0,%eax /* ax has result */
|
||||
movw %ax,(%edi,%esi,2)
|
||||
incl %esi
|
||||
cmpl $39,%esi
|
||||
jle .L21
|
||||
emms
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
leave
|
||||
ret
|
||||
.Lfe1:
|
||||
.size Weighting_filter,.Lfe1-Weighting_filter
|
||||
|
||||
.macro ccstep n
|
||||
.if \n
|
||||
movq \n(%edi),%mm1
|
||||
movq \n(%esi),%mm2
|
||||
.else
|
||||
movq (%edi),%mm1
|
||||
movq (%esi),%mm2
|
||||
.endif
|
||||
pmaddwd %mm2,%mm1
|
||||
paddd %mm1,%mm0
|
||||
.endm
|
||||
|
||||
.align 4
|
||||
/* long k6maxcc(const short *wt, const short *dp, short *Nc_out) */
|
||||
.globl k6maxcc
|
||||
.type k6maxcc,@function
|
||||
k6maxcc:
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
emms
|
||||
movl 8(%ebp),%edi
|
||||
movl 12(%ebp),%esi
|
||||
movl $0,%edx /* will be maximum inner-product */
|
||||
movl $40,%ebx
|
||||
movl %ebx,%ecx /* will be index of max inner-product */
|
||||
subl $80,%esi
|
||||
.p2align 2
|
||||
.L41:
|
||||
movq (%edi),%mm0
|
||||
movq (%esi),%mm2
|
||||
pmaddwd %mm2,%mm0
|
||||
ccstep 8
|
||||
ccstep 16
|
||||
ccstep 24
|
||||
ccstep 32
|
||||
ccstep 40
|
||||
ccstep 48
|
||||
ccstep 56
|
||||
ccstep 64
|
||||
ccstep 72
|
||||
|
||||
movq %mm0,%mm1
|
||||
punpckhdq %mm0,%mm1 /* mm1 has high int32 of mm0 dup'd */
|
||||
paddd %mm1,%mm0;
|
||||
movd %mm0,%eax /* eax has result */
|
||||
|
||||
cmpl %edx,%eax
|
||||
jle .L40
|
||||
movl %eax,%edx
|
||||
movl %ebx,%ecx
|
||||
.p2align 2
|
||||
.L40:
|
||||
subl $2,%esi
|
||||
incl %ebx
|
||||
cmpl $120,%ebx
|
||||
jle .L41
|
||||
movl 16(%ebp),%eax
|
||||
movw %cx,(%eax)
|
||||
movl %edx,%eax
|
||||
emms
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
leave
|
||||
ret
|
||||
.Lfe2:
|
||||
.size k6maxcc,.Lfe2-k6maxcc
|
||||
|
||||
|
||||
.align 4
|
||||
/* long k6iprod (const short *p, const short *q, int n) */
|
||||
.globl k6iprod
|
||||
.type k6iprod,@function
|
||||
k6iprod:
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
emms
|
||||
pxor %mm0,%mm0
|
||||
movl 8(%ebp),%esi
|
||||
movl 12(%ebp),%edi
|
||||
movl 16(%ebp),%eax
|
||||
leal -32(%esi,%eax,2),%edx /* edx = top - 32 */
|
||||
|
||||
cmpl %edx,%esi; ja .L202
|
||||
|
||||
.p2align 2
|
||||
.L201:
|
||||
ccstep 0
|
||||
ccstep 8
|
||||
ccstep 16
|
||||
ccstep 24
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%edi
|
||||
cmpl %edx,%esi; jbe .L201
|
||||
|
||||
.p2align 2
|
||||
.L202:
|
||||
addl $24,%edx /* now edx = top-8 */
|
||||
cmpl %edx,%esi; ja .L205
|
||||
|
||||
.p2align 2
|
||||
.L203:
|
||||
ccstep 0
|
||||
|
||||
addl $8,%esi
|
||||
addl $8,%edi
|
||||
cmpl %edx,%esi; jbe .L203
|
||||
|
||||
.p2align 2
|
||||
.L205:
|
||||
addl $4,%edx /* now edx = top-4 */
|
||||
cmpl %edx,%esi; ja .L207
|
||||
|
||||
movd (%edi),%mm1
|
||||
movd (%esi),%mm2
|
||||
pmaddwd %mm2,%mm1
|
||||
paddd %mm1,%mm0
|
||||
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
|
||||
.p2align 2
|
||||
.L207:
|
||||
addl $2,%edx /* now edx = top-2 */
|
||||
cmpl %edx,%esi; ja .L209
|
||||
|
||||
movswl (%edi),%eax
|
||||
movd %eax,%mm1
|
||||
movswl (%esi),%eax
|
||||
movd %eax,%mm2
|
||||
pmaddwd %mm2,%mm1
|
||||
paddd %mm1,%mm0
|
||||
|
||||
.p2align 2
|
||||
.L209:
|
||||
movq %mm0,%mm1
|
||||
punpckhdq %mm0,%mm1 /* mm1 has high int32 of mm0 dup'd */
|
||||
paddd %mm1,%mm0;
|
||||
movd %mm0,%eax /* eax has result */
|
||||
|
||||
emms
|
||||
popl %esi
|
||||
popl %edi
|
||||
leave
|
||||
ret
|
||||
.Lfe3:
|
||||
.size k6iprod,.Lfe3-k6iprod
|
||||
|
||||
|
||||
.align 4
|
||||
/* void k6vsraw P3((short *p, int n, int bits) */
|
||||
.globl k6vsraw
|
||||
.type k6vsraw,@function
|
||||
k6vsraw:
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
movl 8(%ebp),%esi
|
||||
movl 16(%ebp),%ecx
|
||||
andl %ecx,%ecx; jle .L399
|
||||
movl 12(%ebp),%eax
|
||||
leal -16(%esi,%eax,2),%edx /* edx = top - 16 */
|
||||
emms
|
||||
movd %ecx,%mm3
|
||||
movq ones,%mm2
|
||||
psllw %mm3,%mm2; psrlw $1,%mm2
|
||||
cmpl %edx,%esi; ja .L306
|
||||
|
||||
.p2align 2
|
||||
.L302: /* 8 words per iteration */
|
||||
movq (%esi),%mm0
|
||||
movq 8(%esi),%mm1
|
||||
paddsw %mm2,%mm0
|
||||
psraw %mm3,%mm0;
|
||||
paddsw %mm2,%mm1
|
||||
psraw %mm3,%mm1;
|
||||
movq %mm0,(%esi)
|
||||
movq %mm1,8(%esi)
|
||||
addl $16,%esi
|
||||
cmpl %edx,%esi
|
||||
jbe .L302
|
||||
|
||||
.p2align 2
|
||||
.L306:
|
||||
addl $12,%edx /* now edx = top-4 */
|
||||
cmpl %edx,%esi; ja .L310
|
||||
|
||||
.p2align 2
|
||||
.L308: /* do up to 6 words, two at a time */
|
||||
movd (%esi),%mm0
|
||||
paddsw %mm2,%mm0
|
||||
psraw %mm3,%mm0;
|
||||
movd %mm0,(%esi)
|
||||
addl $4,%esi
|
||||
cmpl %edx,%esi
|
||||
jbe .L308
|
||||
|
||||
.p2align 2
|
||||
.L310:
|
||||
addl $2,%edx /* now edx = top-2 */
|
||||
cmpl %edx,%esi; ja .L315
|
||||
|
||||
movzwl (%esi),%eax
|
||||
movd %eax,%mm0
|
||||
paddsw %mm2,%mm0
|
||||
psraw %mm3,%mm0;
|
||||
movd %mm0,%eax
|
||||
movw %ax,(%esi)
|
||||
|
||||
.p2align 2
|
||||
.L315:
|
||||
emms
|
||||
.L399:
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
.Lfe4:
|
||||
.size k6vsraw,.Lfe4-k6vsraw
|
||||
|
||||
.align 4
|
||||
/* void k6vsllw P3((short *p, int n, int bits) */
|
||||
.globl k6vsllw
|
||||
.type k6vsllw,@function
|
||||
k6vsllw:
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
movl 8(%ebp),%esi
|
||||
movl 16(%ebp),%ecx
|
||||
andl %ecx,%ecx; jle .L499
|
||||
movl 12(%ebp),%eax
|
||||
leal -16(%esi,%eax,2),%edx /* edx = top - 16 */
|
||||
emms
|
||||
movd %ecx,%mm3
|
||||
cmpl %edx,%esi; ja .L406
|
||||
|
||||
.p2align 2
|
||||
.L402: /* 8 words per iteration */
|
||||
movq (%esi),%mm0
|
||||
movq 8(%esi),%mm1
|
||||
psllw %mm3,%mm0;
|
||||
psllw %mm3,%mm1;
|
||||
movq %mm0,(%esi)
|
||||
movq %mm1,8(%esi)
|
||||
addl $16,%esi
|
||||
cmpl %edx,%esi
|
||||
jbe .L402
|
||||
|
||||
.p2align 2
|
||||
.L406:
|
||||
addl $12,%edx /* now edx = top-4 */
|
||||
cmpl %edx,%esi; ja .L410
|
||||
|
||||
.p2align 2
|
||||
.L408: /* do up to 6 words, two at a time */
|
||||
movd (%esi),%mm0
|
||||
psllw %mm3,%mm0;
|
||||
movd %mm0,(%esi)
|
||||
addl $4,%esi
|
||||
cmpl %edx,%esi
|
||||
jbe .L408
|
||||
|
||||
.p2align 2
|
||||
.L410:
|
||||
addl $2,%edx /* now edx = top-2 */
|
||||
cmpl %edx,%esi; ja .L415
|
||||
|
||||
movzwl (%esi),%eax
|
||||
movd %eax,%mm0
|
||||
psllw %mm3,%mm0;
|
||||
movd %mm0,%eax
|
||||
movw %ax,(%esi)
|
||||
|
||||
.p2align 2
|
||||
.L415:
|
||||
emms
|
||||
.L499:
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
.Lfe5:
|
||||
.size k6vsllw,.Lfe5-k6vsllw
|
||||
|
||||
|
||||
.section .rodata
|
||||
.align 4
|
||||
.type extremes,@object
|
||||
.size extremes,8
|
||||
extremes:
|
||||
.long 0x80008000
|
||||
.long 0x7fff7fff
|
||||
.type ones,@object
|
||||
.size ones,8
|
||||
ones:
|
||||
.long 0x00010001
|
||||
.long 0x00010001
|
||||
|
||||
.text
|
||||
.align 4
|
||||
/* long k6maxmin (const short *p, int n, short *out) */
|
||||
.globl k6maxmin
|
||||
.type k6maxmin,@function
|
||||
k6maxmin:
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
emms
|
||||
movl 8(%ebp),%esi
|
||||
movl 12(%ebp),%eax
|
||||
leal -8(%esi,%eax,2),%edx
|
||||
|
||||
cmpl %edx,%esi
|
||||
jbe .L52
|
||||
movd extremes,%mm0
|
||||
movd extremes+4,%mm1
|
||||
jmp .L58
|
||||
|
||||
.p2align 2
|
||||
.L52:
|
||||
movq (%esi),%mm0 /* mm0 will be max's */
|
||||
movq %mm0,%mm1 /* mm1 will be min's */
|
||||
addl $8,%esi
|
||||
cmpl %edx,%esi
|
||||
ja .L56
|
||||
|
||||
.p2align 2
|
||||
.L54:
|
||||
movq (%esi),%mm2
|
||||
|
||||
movq %mm2,%mm3
|
||||
pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */
|
||||
movq %mm3,%mm4
|
||||
pand %mm2,%mm3 /* mm3 is mm2 masked to new max's */
|
||||
pandn %mm0,%mm4 /* mm4 is mm0 masked to its max's */
|
||||
por %mm3,%mm4
|
||||
movq %mm4,%mm0 /* now mm0 is updated max's */
|
||||
|
||||
movq %mm1,%mm3
|
||||
pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */
|
||||
pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */
|
||||
pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */
|
||||
por %mm3,%mm2
|
||||
movq %mm2,%mm1 /* now mm1 is updated min's */
|
||||
|
||||
addl $8,%esi
|
||||
cmpl %edx,%esi
|
||||
jbe .L54
|
||||
|
||||
.p2align 2
|
||||
.L56: /* merge down the 4-word max/mins to lower 2 words */
|
||||
|
||||
movq %mm0,%mm2
|
||||
psrlq $32,%mm2
|
||||
movq %mm2,%mm3
|
||||
pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */
|
||||
pand %mm3,%mm2 /* mm2 is mm2 masked to new max's */
|
||||
pandn %mm0,%mm3 /* mm3 is mm0 masked to its max's */
|
||||
por %mm3,%mm2
|
||||
movq %mm2,%mm0 /* now mm0 is updated max's */
|
||||
|
||||
movq %mm1,%mm2
|
||||
psrlq $32,%mm2
|
||||
movq %mm1,%mm3
|
||||
pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */
|
||||
pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */
|
||||
pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */
|
||||
por %mm3,%mm2
|
||||
movq %mm2,%mm1 /* now mm1 is updated min's */
|
||||
|
||||
.p2align 2
|
||||
.L58:
|
||||
addl $4,%edx /* now dx = top-4 */
|
||||
cmpl %edx,%esi
|
||||
ja .L62
|
||||
/* here, there are >= 2 words of input remaining */
|
||||
movd (%esi),%mm2
|
||||
|
||||
movq %mm2,%mm3
|
||||
pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */
|
||||
movq %mm3,%mm4
|
||||
pand %mm2,%mm3 /* mm3 is mm2 masked to new max's */
|
||||
pandn %mm0,%mm4 /* mm4 is mm0 masked to its max's */
|
||||
por %mm3,%mm4
|
||||
movq %mm4,%mm0 /* now mm0 is updated max's */
|
||||
|
||||
movq %mm1,%mm3
|
||||
pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */
|
||||
pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */
|
||||
pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */
|
||||
por %mm3,%mm2
|
||||
movq %mm2,%mm1 /* now mm1 is updated min's */
|
||||
|
||||
addl $4,%esi
|
||||
|
||||
.p2align 2
|
||||
.L62:
|
||||
/* merge down the 2-word max/mins to 1 word */
|
||||
|
||||
movq %mm0,%mm2
|
||||
psrlq $16,%mm2
|
||||
movq %mm2,%mm3
|
||||
pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */
|
||||
pand %mm3,%mm2 /* mm2 is mm2 masked to new max's */
|
||||
pandn %mm0,%mm3 /* mm3 is mm0 masked to its max's */
|
||||
por %mm3,%mm2
|
||||
movd %mm2,%ecx /* cx is max so far */
|
||||
|
||||
movq %mm1,%mm2
|
||||
psrlq $16,%mm2
|
||||
movq %mm1,%mm3
|
||||
pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */
|
||||
pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */
|
||||
pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */
|
||||
por %mm3,%mm2
|
||||
movd %mm2,%eax /* ax is min so far */
|
||||
|
||||
addl $2,%edx /* now dx = top-2 */
|
||||
cmpl %edx,%esi
|
||||
ja .L65
|
||||
|
||||
/* here, there is one word of input left */
|
||||
cmpw (%esi),%cx
|
||||
jge .L64
|
||||
movw (%esi),%cx
|
||||
.p2align 2
|
||||
.L64:
|
||||
cmpw (%esi),%ax
|
||||
jle .L65
|
||||
movw (%esi),%ax
|
||||
|
||||
.p2align 2
|
||||
.L65: /* (finally!) cx is the max, ax the min */
|
||||
movswl %cx,%ecx
|
||||
movswl %ax,%eax
|
||||
|
||||
movl 16(%ebp),%edx /* ptr to output max,min vals */
|
||||
andl %edx,%edx; jz .L77
|
||||
movw %cx,(%edx) /* max */
|
||||
movw %ax,2(%edx) /* min */
|
||||
.p2align 2
|
||||
.L77:
|
||||
/* now calculate max absolute val */
|
||||
negl %eax
|
||||
cmpl %ecx,%eax
|
||||
jge .L81
|
||||
movl %ecx,%eax
|
||||
.p2align 2
|
||||
.L81:
|
||||
emms
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
.Lfe6:
|
||||
.size k6maxmin,.Lfe6-k6maxmin
|
||||
|
||||
/* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */
|
||||
.equiv pm_u0,8
|
||||
.equiv pm_rp0,12
|
||||
.equiv pm_kn,16
|
||||
.equiv pm_s,20
|
||||
.equiv lv_u_top,-4
|
||||
.equiv lv_s_top,-8
|
||||
.equiv lv_rp,-40 /* local version of rp0 with each word twice */
|
||||
.align 4
|
||||
.globl Short_term_analysis_filteringx
|
||||
.type Short_term_analysis_filteringx,@function
|
||||
Short_term_analysis_filteringx:
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $40,%esp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
|
||||
movl pm_rp0(%ebp),%esi;
|
||||
leal lv_rp(%ebp),%edi;
|
||||
cld
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
emms
|
||||
movl $0x4000,%eax;
|
||||
movd %eax,%mm4;
|
||||
punpckldq %mm4,%mm4 /* (0x00004000,0x00004000) for rounding dword product pairs */
|
||||
|
||||
movl pm_u0(%ebp),%eax
|
||||
addl $16,%eax
|
||||
movl %eax,lv_u_top(%ebp) /* UTOP */
|
||||
movl pm_s(%ebp),%edx /* edx is local s ptr throughout below */
|
||||
movl pm_kn(%ebp),%eax
|
||||
leal (%edx,%eax,2),%eax
|
||||
movl %eax,lv_s_top(%ebp)
|
||||
cmpl %eax,%edx
|
||||
jae .L179
|
||||
.p2align 2
|
||||
.L181:
|
||||
leal lv_rp(%ebp),%esi /* RP */
|
||||
movl pm_u0(%ebp),%edi /* U */
|
||||
movw (%edx),%ax /* (0,DI) */
|
||||
roll $16,%eax
|
||||
movw (%edx),%ax /* (DI,DI) */
|
||||
.p2align 2
|
||||
.L185: /* RP is %esi */
|
||||
movl %eax,%ecx
|
||||
movw (%edi),%ax /* (DI,U) */
|
||||
movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */
|
||||
movw %cx,(%edi)
|
||||
|
||||
movd %eax,%mm2 /* mm2 is (0,0,DI,U) */
|
||||
rorl $16,%eax
|
||||
movd %eax,%mm1 /* mm1 is (0,0,U,DI) */
|
||||
|
||||
movq %mm1,%mm0
|
||||
pmullw %mm3,%mm0
|
||||
pmulhw %mm3,%mm1
|
||||
punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */
|
||||
paddd %mm4,%mm0 /* mm4 is 0x00004000,0x00004000 */
|
||||
psrad $15,%mm0 /* (RP*U,RP*DI) adjusted */
|
||||
packssdw %mm0,%mm0 /* (*,*,RP*U,RP*DI) adjusted and saturated to word */
|
||||
paddsw %mm2,%mm0 /* mm0 is (?,?, DI', U') */
|
||||
movd %mm0,%eax /* (DI,U') */
|
||||
|
||||
addl $2,%edi
|
||||
addl $4,%esi
|
||||
cmpl lv_u_top(%ebp),%edi
|
||||
jb .L185
|
||||
|
||||
rorl $16,%eax
|
||||
movw %ax,(%edx) /* last DI goes to *s */
|
||||
addl $2,%edx /* next s */
|
||||
cmpl lv_s_top(%ebp),%edx
|
||||
jb .L181
|
||||
.p2align 2
|
||||
.L179:
|
||||
emms
|
||||
popl %esi
|
||||
popl %edi
|
||||
leave
|
||||
ret
|
||||
.Lfe7:
|
||||
.size Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx
|
||||
|
||||
.end
|
||||
|
||||
/* 'as' macro's seem to be case-insensitive */
|
||||
.macro STEP n
|
||||
.if \n
|
||||
movd \n(%esi),%mm3 /* mm3 is (0,0,RP,RP) */
|
||||
.else
|
||||
movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */
|
||||
.endif
|
||||
movq %mm5,%mm1;
|
||||
movd %mm4,%ecx; movw %cx,%ax /* (DI,U) */
|
||||
psllq $48,%mm1; psrlq $16,%mm4; por %mm1,%mm4
|
||||
psllq $48,%mm0; psrlq $16,%mm5; por %mm0,%mm5
|
||||
|
||||
movd %eax,%mm2 /* mm2 is (0,0,DI,U) */
|
||||
rorl $16,%eax
|
||||
movd %eax,%mm1 /* mm1 is (0,0,U,DI) */
|
||||
|
||||
movq %mm1,%mm0
|
||||
pmullw %mm3,%mm0
|
||||
pmulhw %mm3,%mm1
|
||||
punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */
|
||||
paddd %mm6,%mm0 /* mm6 is 0x00004000,0x00004000 */
|
||||
psrad $15,%mm0 /* (RP*U,RP*DI) adjusted */
|
||||
packssdw %mm0,%mm0 /* (*,*,RP*U,RP*DI) adjusted and saturated to word */
|
||||
paddsw %mm2,%mm0 /* mm0 is (?,?, DI', U') */
|
||||
movd %mm0,%eax /* (DI,U') */
|
||||
.endm
|
||||
|
||||
/* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */
|
||||
.equiv pm_u0,8
|
||||
.equiv pm_rp0,12
|
||||
.equiv pm_kn,16
|
||||
.equiv pm_s,20
|
||||
.equiv lv_rp_top,-4
|
||||
.equiv lv_s_top,-8
|
||||
.equiv lv_rp,-40 /* local version of rp0 with each word twice */
|
||||
.align 4
|
||||
.globl Short_term_analysis_filteringx
|
||||
.type Short_term_analysis_filteringx,@function
|
||||
Short_term_analysis_filteringx:
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $56,%esp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
|
||||
movl pm_rp0(%ebp),%esi;
|
||||
leal lv_rp(%ebp),%edi;
|
||||
cld
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
lodsw; stosw; stosw
|
||||
movl %edi,lv_rp_top(%ebp)
|
||||
emms
|
||||
|
||||
movl $0x4000,%eax;
|
||||
movd %eax,%mm6;
|
||||
punpckldq %mm6,%mm6 /* (0x00004000,0x00004000) for rounding dword product pairs */
|
||||
|
||||
movl pm_u0(%ebp),%ebx
|
||||
movq (%ebx),%mm4; movq 8(%ebx),%mm5 /* the 8 u's */
|
||||
movl pm_s(%ebp),%edx /* edx is local s ptr throughout below */
|
||||
movl pm_kn(%ebp),%eax
|
||||
leal (%edx,%eax,2),%eax
|
||||
movl %eax,lv_s_top(%ebp)
|
||||
cmpl %eax,%edx
|
||||
jae .L179
|
||||
.p2align 2
|
||||
.L181:
|
||||
leal lv_rp(%ebp),%esi /* RP */
|
||||
movw (%edx),%ax /* (0,DI) */
|
||||
roll $16,%eax
|
||||
movw (%edx),%ax /* (DI,DI) */
|
||||
movd %eax,%mm0
|
||||
.p2align 2
|
||||
.L185: /* RP is %esi */
|
||||
step 0
|
||||
step 4
|
||||
step 8
|
||||
step 12
|
||||
/*
|
||||
step 16
|
||||
step 20
|
||||
step 24
|
||||
step 28
|
||||
*/
|
||||
addl $16,%esi
|
||||
cmpl lv_rp_top(%ebp),%esi
|
||||
jb .L185
|
||||
|
||||
rorl $16,%eax
|
||||
movw %ax,(%edx) /* last DI goes to *s */
|
||||
addl $2,%edx /* next s */
|
||||
cmpl lv_s_top(%ebp),%edx
|
||||
jb .L181
|
||||
.L179:
|
||||
movq %mm4,(%ebx); movq %mm5,8(%ebx) /* the 8 u's */
|
||||
emms
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
leave
|
||||
ret
|
||||
.Lfe7:
|
||||
.size Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx
|
||||
.ident "GCC: (GNU) 2.95.2 19991109 (Debian GNU/Linux)"
|
||||
|
@@ -27,37 +27,6 @@ CFLAGS+= -fPIC -Wno-comment
|
||||
# fails miserably. Remove it for the time being.
|
||||
_ASTCFLAGS:=$(_ASTCFLAGS:-Werror=)
|
||||
|
||||
# If the compiler's '-march' flag has been specified already, then assume it's a value
|
||||
# that is what the user wants (or has been determined by the configure script). If not,
|
||||
# do some simple logic to set a decent value
|
||||
ifeq ($(findstring -march,$(_ASTCFLAGS) $(ASTCFLAGS)),)
|
||||
#fix for PPC processors and ALPHA, And UltraSparc too
|
||||
ifneq ($(OSARCH),Darwin)
|
||||
ifneq ($(findstring BSD,${OSARCH}),BSD)
|
||||
ifneq ($(PROC),ppc)
|
||||
ifneq ($(PROC),x86_64)
|
||||
ifneq ($(PROC),alpha)
|
||||
#The problem with sparc is the best stuff is in newer versions of gcc (post 3.0) only.
|
||||
#This works for even old (2.96) versions of gcc and provides a small boost either way.
|
||||
#A ultrasparc cpu is really v9 but the stock debian stable 3.0 gcc doesn.t support it.
|
||||
#So we go lowest common available by gcc and go a step down, still a step up from
|
||||
#the default as we now have a better instruction set to work with. - Belgarath
|
||||
ifeq ($(PROC),ultrasparc)
|
||||
CFLAGS+= -mtune=$(PROC) -mcpu=v8 -O3 -fomit-frame-pointer
|
||||
else
|
||||
ifneq ($(OSARCH),SunOS)
|
||||
ifneq ($(OSARCH),arm)
|
||||
# CFLAGS+= -march=$(PROC)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
LIB = $(LIB_TARGET_DIR)/liblpc10.a
|
||||
|
||||
.PHONY: all clean
|
||||
|
15
configure.ac
15
configure.ac
@@ -1009,15 +1009,20 @@ else
|
||||
fi
|
||||
AC_SUBST(AST_SHADOW_WARNINGS)
|
||||
|
||||
AC_MSG_CHECKING(for -march=native)
|
||||
AC_MSG_CHECKING(for -march=native support)
|
||||
if $(${CC} -march=native -S -o /dev/null -xc /dev/null > /dev/null 2>&1); then
|
||||
AC_MSG_RESULT(yes)
|
||||
AST_MARCH_NATIVE="-march=native"
|
||||
if test "${CONFIG_CFLAGS}" = ""; then
|
||||
AC_MSG_RESULT(yes)
|
||||
AST_NATIVE_ARCH=1
|
||||
else
|
||||
AC_MSG_RESULT(user CFLAGS present)
|
||||
AST_NATIVE_ARCH=
|
||||
fi
|
||||
else
|
||||
AC_MSG_RESULT(no)
|
||||
AST_MARCH_NATIVE=
|
||||
AST_NATIVE_ARCH=
|
||||
fi
|
||||
AC_SUBST(AST_MARCH_NATIVE)
|
||||
AC_SUBST(AST_NATIVE_ARCH)
|
||||
|
||||
AC_MSG_CHECKING(for sysinfo)
|
||||
AC_LINK_IFELSE(
|
||||
|
@@ -46,7 +46,6 @@ HOST_CPU=@HOST_CPU@
|
||||
HOST_VENDOR=@HOST_VENDOR@
|
||||
HOST_OS=@HOST_OS@
|
||||
|
||||
PROC=@HOST_CPU@
|
||||
OSARCH=@OSARCH@
|
||||
OSREV=@PBX_OSREV@
|
||||
|
||||
|
Reference in New Issue
Block a user