mirror of
				https://github.com/asterisk/asterisk.git
				synced 2025-10-31 02:37:10 +00:00 
			
		
		
		
	Version 0.1.8 from FTP
git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@302 65c4cc65-6c06-0410-ace0-fbb531ad65f3
This commit is contained in:
		
							
								
								
									
										84
									
								
								codecs/gsm/src/k6opt.h
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										84
									
								
								codecs/gsm/src/k6opt.h
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| /* k6opt.h  vector functions optimized for MMX extensions to x86 | ||||
|  * | ||||
|  * Copyright (C) 1999 by Stanley J. Brooks <stabro@megsinet.net> | ||||
|  *  | ||||
|  * Any use of this software is permitted provided that this notice is not | ||||
|  * removed and that neither the authors nor the Technische Universitaet Berlin | ||||
|  * are deemed to have made any representations as to the suitability of this | ||||
|  * software for any purpose nor are held responsible for any defects of | ||||
|  * this software.  THERE IS ABSOLUTELY NO WARRANTY FOR THIS SOFTWARE; | ||||
|  * not even the implied warranty of MERCHANTABILITY or FITNESS FOR | ||||
|  * A PARTICULAR PURPOSE. | ||||
|  *  | ||||
|  * Chicago, 03.12.1999 | ||||
|  * Stanley J. Brooks | ||||
|  */ | ||||
|  | ||||
| extern void Weighting_filter P2((e, x), | ||||
| 	const word	* e,	/* signal [-5..0.39.44]	IN  */ | ||||
| 	word	* x		/* signal [0..39]	OUT */ | ||||
| ) | ||||
| ; | ||||
|  | ||||
| extern longword k6maxcc P3((wt,dp,Nc_out), | ||||
| 	const word *wt, | ||||
| 	const word *dp,  | ||||
| 	word		* Nc_out	/* 		OUT	*/ | ||||
| ) | ||||
| ; | ||||
| /* | ||||
|  * k6maxmin(p,n,out[]) | ||||
|  *  input p[n] is array of shorts (require n>0) | ||||
|  *  returns (long) maximum absolute value.. | ||||
|  *  if out!=NULL, also returns out[0] the maximum and out[1] the minimum | ||||
|  */ | ||||
| extern longword k6maxmin P3((p,n,out), | ||||
| 	const word *p, | ||||
| 	int n,  | ||||
| 	word *out	/* 		out[0] is max, out[1] is min */ | ||||
| ) | ||||
| ; | ||||
|  | ||||
| extern longword k6iprod P3((p,q,n), | ||||
| 	const word *p, | ||||
| 	const word *q, | ||||
| 	int n | ||||
| ) | ||||
| ; | ||||
|  | ||||
| /* | ||||
|  * k6vsraw(p,n,bits) | ||||
|  *  input p[n] is array of shorts (require n>0) | ||||
|  *  shift/round each to the right by bits>=0 bits. | ||||
|  */ | ||||
| extern void k6vsraw P3((p,n,bits), | ||||
| 	const word *p, | ||||
| 	int n,  | ||||
| 	int bits | ||||
| ) | ||||
| ; | ||||
|  | ||||
| /* | ||||
|  * k6vsllw(p,n,bits) | ||||
|  *  input p[n] is array of shorts (require n>0) | ||||
|  *  shift each to the left by bits>=0 bits. | ||||
|  */ | ||||
| extern void k6vsllw P3((p,n,bits), | ||||
| 	const word *p, | ||||
| 	int n,  | ||||
| 	int bits | ||||
| ) | ||||
| ; | ||||
|  | ||||
| #if 1  /* there isn't any significant speed gain from mmx here: */ | ||||
| extern void Short_term_analysis_filteringx P4((u0,rp0,k_n,s), | ||||
| 	register word * u0, | ||||
| 	register word	* rp0,	/* [0..7]	IN	*/ | ||||
| 	register int 	k_n, 	/*   k_end - k_start	*/ | ||||
| 	register word	* s	/* [0..n-1]	IN/OUT	*/ | ||||
| ) | ||||
| ; | ||||
| /* | ||||
| #define Short_term_analysis_filtering Short_term_analysis_filteringx | ||||
| */ | ||||
| #endif | ||||
							
								
								
									
										755
									
								
								codecs/gsm/src/k6opt.s
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										755
									
								
								codecs/gsm/src/k6opt.s
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,755 @@ | ||||
| /* k6opt.s  vector functions optimized for MMX extensions to x86 | ||||
|  * | ||||
|  * Copyright (C) 1999 by Stanley J. Brooks <stabro@megsinet.net> | ||||
|  *  | ||||
|  * Any use of this software is permitted provided that this notice is not | ||||
|  * removed and that neither the authors nor the Technische Universitaet Berlin | ||||
|  * are deemed to have made any representations as to the suitability of this | ||||
|  * software for any purpose nor are held responsible for any defects of | ||||
|  * this software.  THERE IS ABSOLUTELY NO WARRANTY FOR THIS SOFTWARE; | ||||
|  * not even the implied warranty of MERCHANTABILITY or FITNESS FOR | ||||
|  * A PARTICULAR PURPOSE. | ||||
|  *  | ||||
|  * Chicago, 03.12.1999 | ||||
|  * Stanley J. Brooks | ||||
|  */ | ||||
|  | ||||
| 	.file	"k6opt.s" | ||||
| 	.version	"01.01" | ||||
| /* gcc2_compiled.: */ | ||||
| .section	.rodata | ||||
| 	.align 4 | ||||
| 	.type	 coefs,@object | ||||
| 	.size	 coefs,24 | ||||
| coefs: | ||||
| 	.value -134 | ||||
| 	.value -374 | ||||
| 	.value 0 | ||||
| 	.value 2054 | ||||
| 	.value 5741 | ||||
| 	.value 8192 | ||||
| 	.value 5741 | ||||
| 	.value 2054 | ||||
| 	.value 0 | ||||
| 	.value -374 | ||||
| 	.value -134 | ||||
| 	.value 0 | ||||
| .text | ||||
| 	.align 4 | ||||
| /* void Weighting_filter (const short *e, short *x) */ | ||||
| .globl Weighting_filter | ||||
| 	.type	 Weighting_filter,@function | ||||
| Weighting_filter: | ||||
| 	pushl %ebp | ||||
| 	movl %esp,%ebp | ||||
| 	pushl %edi | ||||
| 	pushl %esi | ||||
| 	pushl %ebx | ||||
| 	movl 12(%ebp),%edi | ||||
| 	movl 8(%ebp),%ebx | ||||
| 	addl $-10,%ebx | ||||
| 	emms | ||||
| 	movl $0x1000,%eax; movd %eax,%mm5  /* for rounding */ | ||||
| 	movq coefs,%mm1 | ||||
| 	movq coefs+8,%mm2 | ||||
| 	movq coefs+16,%mm3 | ||||
| 	xorl %esi,%esi | ||||
| 	.p2align 2 | ||||
| .L21: | ||||
| 	movq (%ebx,%esi,2),%mm0 | ||||
| 	pmaddwd %mm1,%mm0 | ||||
|  | ||||
| 	movq 8(%ebx,%esi,2),%mm4 | ||||
| 	pmaddwd %mm2,%mm4 | ||||
| 	paddd %mm4,%mm0 | ||||
|  | ||||
| 	movq 16(%ebx,%esi,2),%mm4 | ||||
| 	pmaddwd %mm3,%mm4 | ||||
| 	paddd %mm4,%mm0 | ||||
|  | ||||
| 	movq %mm0,%mm4 | ||||
| 	punpckhdq %mm0,%mm4  /* mm4 has high int32 of mm0 dup'd */ | ||||
| 	paddd %mm4,%mm0; | ||||
|  | ||||
| 	paddd %mm5,%mm0 /* add for roundoff */ | ||||
| 	psrad $13,%mm0 | ||||
| 	packssdw %mm0,%mm0	 | ||||
| 	movd %mm0,%eax  /* ax has result */ | ||||
| 	movw %ax,(%edi,%esi,2) | ||||
| 	incl %esi | ||||
| 	cmpl $39,%esi | ||||
| 	jle .L21 | ||||
| 	emms | ||||
| 	popl %ebx | ||||
| 	popl %esi | ||||
| 	popl %edi | ||||
| 	leave | ||||
| 	ret | ||||
| .Lfe1: | ||||
| 	.size	 Weighting_filter,.Lfe1-Weighting_filter | ||||
|  | ||||
| .macro ccstep n | ||||
| .if \n | ||||
| 	movq \n(%edi),%mm1 | ||||
| 	movq \n(%esi),%mm2 | ||||
| .else | ||||
| 	movq (%edi),%mm1 | ||||
| 	movq (%esi),%mm2 | ||||
| .endif | ||||
| 	pmaddwd %mm2,%mm1 | ||||
| 	paddd %mm1,%mm0 | ||||
| .endm | ||||
|  | ||||
| 	.align 4 | ||||
| /* long k6maxcc(const short *wt, const short *dp, short *Nc_out) */ | ||||
| .globl k6maxcc | ||||
| 	.type	 k6maxcc,@function | ||||
| k6maxcc: | ||||
| 	pushl %ebp | ||||
| 	movl %esp,%ebp | ||||
| 	pushl %edi | ||||
| 	pushl %esi | ||||
| 	pushl %ebx | ||||
| 	emms | ||||
| 	movl 8(%ebp),%edi | ||||
| 	movl 12(%ebp),%esi | ||||
| 	movl $0,%edx  /* will be maximum inner-product */ | ||||
| 	movl $40,%ebx | ||||
| 	movl %ebx,%ecx /* will be index of max inner-product */ | ||||
| 	subl $80,%esi | ||||
| 	.p2align 2 | ||||
| .L41: | ||||
| 	movq (%edi),%mm0 | ||||
| 	movq (%esi),%mm2 | ||||
| 	pmaddwd %mm2,%mm0 | ||||
| 	ccstep 8 | ||||
| 	ccstep 16 | ||||
| 	ccstep 24 | ||||
| 	ccstep 32 | ||||
| 	ccstep 40 | ||||
| 	ccstep 48 | ||||
| 	ccstep 56 | ||||
| 	ccstep 64 | ||||
| 	ccstep 72 | ||||
|  | ||||
| 	movq %mm0,%mm1 | ||||
| 	punpckhdq %mm0,%mm1  /* mm1 has high int32 of mm0 dup'd */ | ||||
| 	paddd %mm1,%mm0; | ||||
| 	movd %mm0,%eax  /* eax has result */ | ||||
|  | ||||
| 	cmpl %edx,%eax | ||||
| 	jle .L40 | ||||
| 	movl %eax,%edx | ||||
| 	movl %ebx,%ecx | ||||
| 	.p2align 2 | ||||
| .L40: | ||||
| 	subl $2,%esi | ||||
| 	incl %ebx | ||||
| 	cmpl $120,%ebx | ||||
| 	jle .L41 | ||||
| 	movl 16(%ebp),%eax | ||||
| 	movw %cx,(%eax) | ||||
| 	movl %edx,%eax | ||||
| 	emms | ||||
| 	popl %ebx | ||||
| 	popl %esi | ||||
| 	popl %edi | ||||
| 	leave | ||||
| 	ret | ||||
| .Lfe2: | ||||
| 	.size	 k6maxcc,.Lfe2-k6maxcc | ||||
|  | ||||
|  | ||||
| 	.align 4 | ||||
| /* long k6iprod (const short *p, const short *q, int n) */ | ||||
| .globl k6iprod | ||||
| 	.type	 k6iprod,@function | ||||
| k6iprod: | ||||
| 	pushl %ebp | ||||
| 	movl %esp,%ebp | ||||
| 	pushl %edi | ||||
| 	pushl %esi | ||||
| 	emms | ||||
| 	pxor %mm0,%mm0 | ||||
| 	movl 8(%ebp),%esi | ||||
| 	movl 12(%ebp),%edi | ||||
| 	movl 16(%ebp),%eax | ||||
| 	leal -32(%esi,%eax,2),%edx /* edx = top - 32 */ | ||||
|  | ||||
| 	cmpl %edx,%esi; ja .L202 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L201: | ||||
| 	ccstep 0 | ||||
| 	ccstep 8 | ||||
| 	ccstep 16 | ||||
| 	ccstep 24 | ||||
|  | ||||
| 	addl $32,%esi | ||||
| 	addl $32,%edi | ||||
| 	cmpl %edx,%esi; jbe .L201 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L202: | ||||
| 	addl $24,%edx  /* now edx = top-8 */ | ||||
| 	cmpl %edx,%esi; ja .L205 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L203: | ||||
| 	ccstep 0 | ||||
|  | ||||
| 	addl $8,%esi | ||||
| 	addl $8,%edi | ||||
| 	cmpl %edx,%esi; jbe .L203 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L205: | ||||
| 	addl $4,%edx  /* now edx = top-4 */ | ||||
| 	cmpl %edx,%esi; ja .L207 | ||||
|  | ||||
| 	movd (%edi),%mm1 | ||||
| 	movd (%esi),%mm2 | ||||
| 	pmaddwd %mm2,%mm1 | ||||
| 	paddd %mm1,%mm0 | ||||
|  | ||||
| 	addl $4,%esi | ||||
| 	addl $4,%edi | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L207: | ||||
| 	addl $2,%edx  /* now edx = top-2 */ | ||||
| 	cmpl %edx,%esi; ja .L209 | ||||
|  | ||||
| 	movswl (%edi),%eax | ||||
| 	movd %eax,%mm1 | ||||
| 	movswl (%esi),%eax | ||||
| 	movd %eax,%mm2 | ||||
| 	pmaddwd %mm2,%mm1 | ||||
| 	paddd %mm1,%mm0 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L209: | ||||
| 	movq %mm0,%mm1 | ||||
| 	punpckhdq %mm0,%mm1  /* mm1 has high int32 of mm0 dup'd */ | ||||
| 	paddd %mm1,%mm0; | ||||
| 	movd %mm0,%eax  /* eax has result */ | ||||
|  | ||||
| 	emms | ||||
| 	popl %esi | ||||
| 	popl %edi | ||||
| 	leave | ||||
| 	ret | ||||
| .Lfe3: | ||||
| 	.size	 k6iprod,.Lfe3-k6iprod | ||||
|  | ||||
|  | ||||
| 	.align 4 | ||||
| /* void k6vsraw P3((short *p, int n, int bits) */ | ||||
| .globl k6vsraw | ||||
| 	.type	 k6vsraw,@function | ||||
| k6vsraw: | ||||
| 	pushl %ebp | ||||
| 	movl %esp,%ebp | ||||
| 	pushl %esi | ||||
| 	movl 8(%ebp),%esi | ||||
| 	movl 16(%ebp),%ecx | ||||
| 	andl %ecx,%ecx; jle .L399 | ||||
| 	movl 12(%ebp),%eax | ||||
| 	leal -16(%esi,%eax,2),%edx /* edx = top - 16 */ | ||||
| 	emms | ||||
| 	movd %ecx,%mm3 | ||||
| 	movq ones,%mm2 | ||||
| 	psllw %mm3,%mm2; psrlw $1,%mm2 | ||||
| 	cmpl %edx,%esi; ja .L306 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L302: /* 8 words per iteration */ | ||||
| 	movq (%esi),%mm0 | ||||
| 	movq 8(%esi),%mm1 | ||||
| 	paddsw %mm2,%mm0 | ||||
| 	psraw %mm3,%mm0; | ||||
| 	paddsw %mm2,%mm1 | ||||
| 	psraw %mm3,%mm1; | ||||
| 	movq %mm0,(%esi) | ||||
| 	movq %mm1,8(%esi) | ||||
| 	addl $16,%esi | ||||
| 	cmpl %edx,%esi | ||||
| 	jbe .L302 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L306: | ||||
| 	addl $12,%edx /* now edx = top-4 */ | ||||
| 	cmpl %edx,%esi; ja .L310 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L308: /* do up to 6 words, two at a time */ | ||||
| 	movd  (%esi),%mm0 | ||||
| 	paddsw %mm2,%mm0 | ||||
| 	psraw %mm3,%mm0; | ||||
| 	movd %mm0,(%esi) | ||||
| 	addl $4,%esi | ||||
| 	cmpl %edx,%esi | ||||
| 	jbe .L308 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L310: | ||||
| 	addl $2,%edx /* now edx = top-2 */ | ||||
| 	cmpl %edx,%esi; ja .L315 | ||||
| 	 | ||||
| 	movzwl (%esi),%eax | ||||
| 	movd %eax,%mm0 | ||||
| 	paddsw %mm2,%mm0 | ||||
| 	psraw %mm3,%mm0; | ||||
| 	movd %mm0,%eax | ||||
| 	movw %ax,(%esi) | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L315: | ||||
| 	emms | ||||
| .L399: | ||||
| 	popl %esi | ||||
| 	leave | ||||
| 	ret | ||||
| .Lfe4: | ||||
| 	.size	 k6vsraw,.Lfe4-k6vsraw | ||||
| 	 | ||||
| 	.align 4 | ||||
| /* void k6vsllw P3((short *p, int n, int bits) */ | ||||
| .globl k6vsllw | ||||
| 	.type	 k6vsllw,@function | ||||
| k6vsllw: | ||||
| 	pushl %ebp | ||||
| 	movl %esp,%ebp | ||||
| 	pushl %esi | ||||
| 	movl 8(%ebp),%esi | ||||
| 	movl 16(%ebp),%ecx | ||||
| 	andl %ecx,%ecx; jle .L499 | ||||
| 	movl 12(%ebp),%eax | ||||
| 	leal -16(%esi,%eax,2),%edx /* edx = top - 16 */ | ||||
| 	emms | ||||
| 	movd %ecx,%mm3 | ||||
| 	cmpl %edx,%esi; ja .L406 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L402: /* 8 words per iteration */ | ||||
| 	movq (%esi),%mm0 | ||||
| 	movq 8(%esi),%mm1 | ||||
| 	psllw %mm3,%mm0; | ||||
| 	psllw %mm3,%mm1; | ||||
| 	movq %mm0,(%esi) | ||||
| 	movq %mm1,8(%esi) | ||||
| 	addl $16,%esi | ||||
| 	cmpl %edx,%esi | ||||
| 	jbe .L402 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L406: | ||||
| 	addl $12,%edx /* now edx = top-4 */ | ||||
| 	cmpl %edx,%esi; ja .L410 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L408: /* do up to 6 words, two at a time */ | ||||
| 	movd (%esi),%mm0 | ||||
| 	psllw %mm3,%mm0; | ||||
| 	movd %mm0,(%esi) | ||||
| 	addl $4,%esi | ||||
| 	cmpl %edx,%esi | ||||
| 	jbe .L408 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L410: | ||||
| 	addl $2,%edx /* now edx = top-2 */ | ||||
| 	cmpl %edx,%esi; ja .L415 | ||||
| 	 | ||||
| 	movzwl (%esi),%eax | ||||
| 	movd %eax,%mm0 | ||||
| 	psllw %mm3,%mm0; | ||||
| 	movd %mm0,%eax | ||||
| 	movw %ax,(%esi) | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L415: | ||||
| 	emms | ||||
| .L499: | ||||
| 	popl %esi | ||||
| 	leave | ||||
| 	ret | ||||
| .Lfe5: | ||||
| 	.size	 k6vsllw,.Lfe5-k6vsllw | ||||
|  | ||||
|  | ||||
| .section	.rodata | ||||
| 	.align 4 | ||||
| 	.type	 extremes,@object | ||||
| 	.size	 extremes,8 | ||||
| extremes: | ||||
| 	.long 0x80008000 | ||||
| 	.long 0x7fff7fff | ||||
| 	.type	 ones,@object | ||||
| 	.size	 ones,8 | ||||
| ones: | ||||
| 	.long 0x00010001 | ||||
| 	.long 0x00010001 | ||||
|  | ||||
| .text | ||||
| 	.align 4 | ||||
| /* long k6maxmin (const short *p, int n, short *out) */ | ||||
| .globl k6maxmin | ||||
| 	.type	 k6maxmin,@function | ||||
| k6maxmin: | ||||
| 	pushl %ebp | ||||
| 	movl %esp,%ebp | ||||
| 	pushl %esi | ||||
| 	emms | ||||
| 	movl 8(%ebp),%esi | ||||
| 	movl 12(%ebp),%eax | ||||
| 	leal -8(%esi,%eax,2),%edx | ||||
|  | ||||
| 	cmpl %edx,%esi | ||||
| 	jbe .L52 | ||||
| 	movd extremes,%mm0 | ||||
| 	movd extremes+4,%mm1 | ||||
| 	jmp .L58 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L52: | ||||
| 	movq (%esi),%mm0   /* mm0 will be max's */ | ||||
| 	movq %mm0,%mm1     /* mm1 will be min's */ | ||||
| 	addl $8,%esi | ||||
| 	cmpl %edx,%esi | ||||
| 	ja .L56 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L54: | ||||
| 	movq (%esi),%mm2 | ||||
|  | ||||
| 	movq %mm2,%mm3 | ||||
| 	pcmpgtw %mm0,%mm3  /* mm3 is bitmask for words where mm2 > mm0 */  | ||||
| 	movq %mm3,%mm4 | ||||
| 	pand %mm2,%mm3     /* mm3 is mm2 masked to new max's */ | ||||
| 	pandn %mm0,%mm4    /* mm4 is mm0 masked to its max's */ | ||||
| 	por %mm3,%mm4 | ||||
| 	movq %mm4,%mm0     /* now mm0 is updated max's */ | ||||
| 	 | ||||
| 	movq %mm1,%mm3 | ||||
| 	pcmpgtw %mm2,%mm3  /* mm3 is bitmask for words where mm2 < mm1 */  | ||||
| 	pand %mm3,%mm2     /* mm2 is mm2 masked to new min's */ | ||||
| 	pandn %mm1,%mm3    /* mm3 is mm1 masked to its min's */ | ||||
| 	por %mm3,%mm2 | ||||
| 	movq %mm2,%mm1     /* now mm1 is updated min's */ | ||||
|  | ||||
| 	addl $8,%esi | ||||
| 	cmpl %edx,%esi | ||||
| 	jbe .L54 | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L56: /* merge down the 4-word max/mins to lower 2 words */ | ||||
|  | ||||
| 	movq %mm0,%mm2 | ||||
| 	psrlq $32,%mm2 | ||||
| 	movq %mm2,%mm3 | ||||
| 	pcmpgtw %mm0,%mm3  /* mm3 is bitmask for words where mm2 > mm0 */  | ||||
| 	pand %mm3,%mm2     /* mm2 is mm2 masked to new max's */ | ||||
| 	pandn %mm0,%mm3    /* mm3 is mm0 masked to its max's */ | ||||
| 	por %mm3,%mm2 | ||||
| 	movq %mm2,%mm0     /* now mm0 is updated max's */ | ||||
|  | ||||
| 	movq %mm1,%mm2 | ||||
| 	psrlq $32,%mm2 | ||||
| 	movq %mm1,%mm3 | ||||
| 	pcmpgtw %mm2,%mm3  /* mm3 is bitmask for words where mm2 < mm1 */  | ||||
| 	pand %mm3,%mm2     /* mm2 is mm2 masked to new min's */ | ||||
| 	pandn %mm1,%mm3    /* mm3 is mm1 masked to its min's */ | ||||
| 	por %mm3,%mm2 | ||||
| 	movq %mm2,%mm1     /* now mm1 is updated min's */ | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L58: | ||||
| 	addl $4,%edx       /* now dx = top-4 */ | ||||
| 	cmpl %edx,%esi | ||||
| 	ja .L62 | ||||
| 	/* here, there are >= 2 words of input remaining */ | ||||
| 	movd (%esi),%mm2 | ||||
|  | ||||
| 	movq %mm2,%mm3 | ||||
| 	pcmpgtw %mm0,%mm3  /* mm3 is bitmask for words where mm2 > mm0 */  | ||||
| 	movq %mm3,%mm4 | ||||
| 	pand %mm2,%mm3     /* mm3 is mm2 masked to new max's */ | ||||
| 	pandn %mm0,%mm4    /* mm4 is mm0 masked to its max's */ | ||||
| 	por %mm3,%mm4 | ||||
| 	movq %mm4,%mm0     /* now mm0 is updated max's */ | ||||
| 	 | ||||
| 	movq %mm1,%mm3 | ||||
| 	pcmpgtw %mm2,%mm3  /* mm3 is bitmask for words where mm2 < mm1 */  | ||||
| 	pand %mm3,%mm2     /* mm2 is mm2 masked to new min's */ | ||||
| 	pandn %mm1,%mm3    /* mm3 is mm1 masked to its min's */ | ||||
| 	por %mm3,%mm2 | ||||
| 	movq %mm2,%mm1     /* now mm1 is updated min's */ | ||||
|  | ||||
| 	addl $4,%esi | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L62: | ||||
| 	/* merge down the 2-word max/mins to 1 word */ | ||||
|  | ||||
| 	movq %mm0,%mm2 | ||||
| 	psrlq $16,%mm2 | ||||
| 	movq %mm2,%mm3 | ||||
| 	pcmpgtw %mm0,%mm3  /* mm3 is bitmask for words where mm2 > mm0 */  | ||||
| 	pand %mm3,%mm2     /* mm2 is mm2 masked to new max's */ | ||||
| 	pandn %mm0,%mm3    /* mm3 is mm0 masked to its max's */ | ||||
| 	por %mm3,%mm2 | ||||
| 	movd %mm2,%ecx     /* cx is max so far */ | ||||
|  | ||||
| 	movq %mm1,%mm2 | ||||
| 	psrlq $16,%mm2 | ||||
| 	movq %mm1,%mm3 | ||||
| 	pcmpgtw %mm2,%mm3  /* mm3 is bitmask for words where mm2 < mm1 */  | ||||
| 	pand %mm3,%mm2     /* mm2 is mm2 masked to new min's */ | ||||
| 	pandn %mm1,%mm3    /* mm3 is mm1 masked to its min's */ | ||||
| 	por %mm3,%mm2 | ||||
| 	movd %mm2,%eax     /* ax is min so far */ | ||||
| 	 | ||||
| 	addl $2,%edx       /* now dx = top-2 */ | ||||
| 	cmpl %edx,%esi | ||||
| 	ja .L65 | ||||
|  | ||||
| 	/* here, there is one word of input left */ | ||||
| 	cmpw (%esi),%cx | ||||
| 	jge .L64 | ||||
| 	movw (%esi),%cx | ||||
| 	.p2align 2 | ||||
| .L64: | ||||
| 	cmpw (%esi),%ax | ||||
| 	jle .L65 | ||||
| 	movw (%esi),%ax | ||||
|  | ||||
| 	.p2align 2 | ||||
| .L65:  /* (finally!) cx is the max, ax the min */ | ||||
| 	movswl %cx,%ecx | ||||
| 	movswl %ax,%eax | ||||
|  | ||||
| 	movl 16(%ebp),%edx /* ptr to output max,min vals */ | ||||
| 	andl %edx,%edx; jz .L77 | ||||
| 	movw %cx,(%edx)  /* max */ | ||||
| 	movw %ax,2(%edx) /* min */ | ||||
| 	.p2align 2 | ||||
| .L77: | ||||
| 	/* now calculate max absolute val */ | ||||
| 	negl %eax | ||||
| 	cmpl %ecx,%eax | ||||
| 	jge .L81 | ||||
| 	movl %ecx,%eax | ||||
| 	.p2align 2 | ||||
| .L81: | ||||
| 	emms | ||||
| 	popl %esi | ||||
| 	leave | ||||
| 	ret | ||||
| .Lfe6: | ||||
| 	.size	 k6maxmin,.Lfe6-k6maxmin | ||||
|  | ||||
| /* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */ | ||||
| 	.equiv pm_u0,8 | ||||
| 	.equiv pm_rp0,12 | ||||
| 	.equiv pm_kn,16 | ||||
| 	.equiv pm_s,20 | ||||
| 	.equiv lv_u_top,-4 | ||||
| 	.equiv lv_s_top,-8 | ||||
| 	.equiv lv_rp,-40 /* local version of rp0 with each word twice */ | ||||
| 	.align 4 | ||||
| .globl Short_term_analysis_filteringx | ||||
| 	.type	 Short_term_analysis_filteringx,@function | ||||
| Short_term_analysis_filteringx: | ||||
| 	pushl %ebp | ||||
| 	movl %esp,%ebp | ||||
| 	subl $40,%esp | ||||
| 	pushl %edi | ||||
| 	pushl %esi | ||||
|  | ||||
| 	movl pm_rp0(%ebp),%esi; | ||||
| 	leal lv_rp(%ebp),%edi; | ||||
| 	cld | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	emms | ||||
| 	movl $0x4000,%eax; | ||||
| 	movd %eax,%mm4; | ||||
| 	punpckldq %mm4,%mm4 /* (0x00004000,0x00004000) for rounding dword product pairs */ | ||||
|  | ||||
| 	movl pm_u0(%ebp),%eax | ||||
| 	addl $16,%eax | ||||
| 	movl %eax,lv_u_top(%ebp) /* UTOP */ | ||||
| 	movl pm_s(%ebp),%edx  /* edx is local s ptr throughout below */ | ||||
| 	movl pm_kn(%ebp),%eax | ||||
| 	leal (%edx,%eax,2),%eax | ||||
| 	movl %eax,lv_s_top(%ebp) | ||||
| 	cmpl %eax,%edx | ||||
| 	jae .L179 | ||||
| 	.p2align 2 | ||||
| .L181: | ||||
| 	leal lv_rp(%ebp),%esi  /* RP */ | ||||
| 	movl pm_u0(%ebp),%edi  /* U  */ | ||||
| 	movw (%edx),%ax /* (0,DI) */ | ||||
| 	roll $16,%eax | ||||
| 	movw (%edx),%ax /* (DI,DI) */ | ||||
| 	.p2align 2 | ||||
| .L185: /* RP is %esi */ | ||||
| 	movl %eax,%ecx | ||||
| 	movw (%edi),%ax  /* (DI,U) */ | ||||
| 	movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */ | ||||
| 	movw %cx,(%edi) | ||||
|  | ||||
| 	movd %eax,%mm2   /* mm2 is (0,0,DI,U) */ | ||||
| 	rorl $16,%eax  | ||||
| 	movd %eax,%mm1   /* mm1 is (0,0,U,DI) */ | ||||
|  | ||||
| 	movq %mm1,%mm0 | ||||
| 	pmullw %mm3,%mm0 | ||||
| 	pmulhw %mm3,%mm1 | ||||
| 	punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */ | ||||
| 	paddd %mm4,%mm0     /* mm4 is 0x00004000,0x00004000 */ | ||||
| 	psrad $15,%mm0      /* (RP*U,RP*DI) adjusted */ | ||||
| 	packssdw %mm0,%mm0  /* (*,*,RP*U,RP*DI) adjusted and saturated to word */ | ||||
| 	paddsw %mm2,%mm0    /* mm0 is (?,?, DI', U') */ | ||||
| 	movd %mm0,%eax      /* (DI,U') */ | ||||
|  | ||||
| 	addl $2,%edi | ||||
| 	addl $4,%esi | ||||
| 	cmpl lv_u_top(%ebp),%edi | ||||
| 	jb .L185 | ||||
|  | ||||
| 	rorl $16,%eax | ||||
| 	movw %ax,(%edx) /* last DI goes to *s */ | ||||
| 	addl $2,%edx    /* next s */ | ||||
| 	cmpl lv_s_top(%ebp),%edx | ||||
| 	jb .L181 | ||||
| 	.p2align 2 | ||||
| .L179: | ||||
| 	emms | ||||
| 	popl %esi | ||||
| 	popl %edi | ||||
| 	leave | ||||
| 	ret | ||||
| .Lfe7: | ||||
| 	.size	 Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx | ||||
|  | ||||
| .end | ||||
|  | ||||
| /* 'as' macro's seem to be case-insensitive */ | ||||
| .macro STEP n | ||||
| .if \n | ||||
| 	movd \n(%esi),%mm3 /* mm3 is (0,0,RP,RP) */ | ||||
| .else | ||||
| 	movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */ | ||||
| .endif | ||||
| 	movq %mm5,%mm1; | ||||
| 	movd %mm4,%ecx; movw %cx,%ax  /* (DI,U) */ | ||||
| 	psllq $48,%mm1; psrlq $16,%mm4; por %mm1,%mm4 | ||||
| 	psllq $48,%mm0; psrlq $16,%mm5; por %mm0,%mm5 | ||||
|  | ||||
| 	movd %eax,%mm2   /* mm2 is (0,0,DI,U) */ | ||||
| 	rorl $16,%eax  | ||||
| 	movd %eax,%mm1   /* mm1 is (0,0,U,DI) */ | ||||
|  | ||||
| 	movq %mm1,%mm0 | ||||
| 	pmullw %mm3,%mm0 | ||||
| 	pmulhw %mm3,%mm1 | ||||
| 	punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */ | ||||
| 	paddd %mm6,%mm0     /* mm6 is 0x00004000,0x00004000 */ | ||||
| 	psrad $15,%mm0      /* (RP*U,RP*DI) adjusted */ | ||||
| 	packssdw %mm0,%mm0  /* (*,*,RP*U,RP*DI) adjusted and saturated to word */ | ||||
| 	paddsw %mm2,%mm0    /* mm0 is (?,?, DI', U') */ | ||||
| 	movd %mm0,%eax      /* (DI,U') */ | ||||
| .endm | ||||
|  | ||||
| /* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */ | ||||
| 	.equiv pm_u0,8 | ||||
| 	.equiv pm_rp0,12 | ||||
| 	.equiv pm_kn,16 | ||||
| 	.equiv pm_s,20 | ||||
| 	.equiv lv_rp_top,-4 | ||||
| 	.equiv lv_s_top,-8 | ||||
| 	.equiv lv_rp,-40 /* local version of rp0 with each word twice */ | ||||
| 	.align 4 | ||||
| .globl Short_term_analysis_filteringx | ||||
| 	.type	 Short_term_analysis_filteringx,@function | ||||
| Short_term_analysis_filteringx: | ||||
| 	pushl %ebp | ||||
| 	movl %esp,%ebp | ||||
| 	subl $56,%esp | ||||
| 	pushl %edi | ||||
| 	pushl %esi | ||||
| 	pushl %ebx | ||||
|  | ||||
| 	movl pm_rp0(%ebp),%esi; | ||||
| 	leal lv_rp(%ebp),%edi; | ||||
| 	cld | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	lodsw; stosw; stosw | ||||
| 	movl %edi,lv_rp_top(%ebp) | ||||
| 	emms | ||||
|  | ||||
| 	movl $0x4000,%eax; | ||||
| 	movd %eax,%mm6; | ||||
| 	punpckldq %mm6,%mm6 /* (0x00004000,0x00004000) for rounding dword product pairs */ | ||||
|  | ||||
| 	movl pm_u0(%ebp),%ebx | ||||
| 	movq (%ebx),%mm4; movq 8(%ebx),%mm5 /* the 8 u's */ | ||||
| 	movl pm_s(%ebp),%edx  /* edx is local s ptr throughout below */ | ||||
| 	movl pm_kn(%ebp),%eax | ||||
| 	leal (%edx,%eax,2),%eax | ||||
| 	movl %eax,lv_s_top(%ebp) | ||||
| 	cmpl %eax,%edx | ||||
| 	jae .L179 | ||||
| 	.p2align 2 | ||||
| .L181: | ||||
| 	leal lv_rp(%ebp),%esi  /* RP */ | ||||
| 	movw (%edx),%ax /* (0,DI) */ | ||||
| 	roll $16,%eax | ||||
| 	movw (%edx),%ax /* (DI,DI) */ | ||||
| 	movd %eax,%mm0 | ||||
| 	.p2align 2 | ||||
| .L185: /* RP is %esi */ | ||||
| 	step 0 | ||||
| 	step 4 | ||||
| 	step 8 | ||||
| 	step 12 | ||||
| /* | ||||
| 	step 16 | ||||
| 	step 20 | ||||
| 	step 24 | ||||
| 	step 28 | ||||
| */ | ||||
| 	addl $16,%esi | ||||
| 	cmpl lv_rp_top(%ebp),%esi  | ||||
| 	jb .L185 | ||||
|  | ||||
| 	rorl $16,%eax | ||||
| 	movw %ax,(%edx) /* last DI goes to *s */ | ||||
| 	addl $2,%edx    /* next s */ | ||||
| 	cmpl lv_s_top(%ebp),%edx | ||||
| 	jb .L181 | ||||
| .L179: | ||||
| 	movq %mm4,(%ebx); movq %mm5,8(%ebx) /* the 8 u's */ | ||||
| 	emms | ||||
| 	popl %ebx | ||||
| 	popl %esi | ||||
| 	popl %edi | ||||
| 	leave | ||||
| 	ret | ||||
| .Lfe7: | ||||
| 	.size	 Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx | ||||
| 	.ident	"GCC: (GNU) 2.95.2 19991109 (Debian GNU/Linux)" | ||||
		Reference in New Issue
	
	Block a user