campo-sirio/gfm/mgmn.asm
alex ba237a9d91 Patch level : no patch
Files correlati     :
Ricompilazione Demo : [ ]
Commento            :
Aggiunti i sorgenti per Greenleaf Math Library (gfm.dll)


git-svn-id: svn://10.65.10.50/trunk@10079 c028cbd2-c16b-5b4b-a496-9718f37d4682
2002-02-26 12:19:02 +00:00

314 lines
10 KiB
NASM
Executable File

; int _MulUnsArrByUnsArr(src1,src2,dst,m,n,tmp1)
;
; ARGUMENT
; unsigned *src1[m],*src2[n]; where (m,n)<=5
; unsigned *dst[10]; destination is unsigned [10]
; int m,n; m=#ints in src1, n=#ints in src2
; int tmp1; where tmp1 is used as a temp multiplier for each loop
;
; DESCRIPTION
;
; Multiplies multiplier by multiplicand giving dst. Src1 and scr2 are
; 80-bitx80-bit is computed to 160-bit. The number of ints in src1
; and src2 are examined to determine which is the multiplier, ie.
; which has the fewest number of ints. If they have the same number
; number of ints, then src1 will be the multiplier. As each int
; is multiplied to obtain the partial product, it is added to the dst
; and any carries are added to succeeding column locations in the
; dst array.
;
; SIDE EFFECTS
; Src1 and src2 remain unchanged. Dst IS ZEROED PRIOR TO ADD. NOTE THAT
; NEITHER pSrc1 or pSrc2 CAN HAVE THE MSB OF THE HIGH-ORDER INT SET, ie.
; neither number can be negative. If it is, the results are indeterminate.
;
; RETURNS
; None.
;
; AUTHOR
; Andy Anderson 04-Jun-88 1030
; Copyright (C) 1987-90 Greenleaf Software Inc. All Rights Reserved.
;
; MODIFICATIONS
;
;
.SFCOND
include model.h
include prologue.h
include gm.equ
; partial products for intermediate results
ReferVar wGMTemp1,<cWord>
dseg _gm
endds
pseg gmath
;
;
; if large memory model then:
;
; parm1_ = ptr to multiplier
; parm3_ = ptr to multiplicand
; parm5_ = destination segment ptr } pointer to unsigned[10]
; destination offset ptr } destination
; parm7_ = # of 16-bit int's in multiplier
; parm8_ = # of 16-bit int's in multiplicand
; parm9_ = temp for current multiplier
;
; for if small model then
; parm1_ = ptr to multiplier
; parm2_ = ptr to multiplicand
; parm3_ = ptr to unsigned[10] dst
; parm4_ = # of 16-bit int's in MULTIPLIER (a1,...,an)
; parm5_ = # of 16-bit int's in MULTIPLICAND(b1,...,bn)
; parm6_ = temp for current multiplier
;
;
; Then set up and do the first set of mults. Note that the # of
; 'bigloop' iterations correspond to the number of 16-bit digits
; in the multiplier and the 'mlp' loop iterations correspond to the
; # ints in the multiplicand. [these line up if you set tab=5].
; Algorithm(32-bitx32-bit):
; b2 b1
; a2 a1
; __________
;
;| first p1h p1l
;|iteration p2h p2l
;|2nd p3h p3l
;|iter p4h p4l
; ____________________________________
; c4 c3 c2 c1
;
;or---> Algorithm(64-bitx32-bit):
; b4 b3 b2 b1
; a2 a1
; __________________
;| first p1h p1l
;|iteration p2h p2l
;| p3h p3l
;| p4h p4l
;----
;| p5h p5l
;| 2nd p6h p6l
;|iter p7h p7l
;| p8h p8l
;-----
;______________________________________
; 0 0 c6 c5 c4 c3 c2 c1
;
;or---> Algorithm(64-bitx64-bit):
; b4 b3 b2 b1
; a4 a3 a2 a1
; __________________
;| first p1h p1l
;|iteration p2h p2l
;| p3h p3l
;| p4h p4l
;----
;| p5h p5l
;| 2nd p6h p6l
;|iter p7h p7l
;| p8h p8l
;-----
;| third p1h p1l
;|iteration p2h p2l
;| p3h p3l
;| p4h p4l
;----
;| p5h p5l
;| 4th p6h p6l
;|iter p7h p7l
;| p8h p8l
;-----
;| fifth p9h p9l
;|iteration p10h p10l
;| p11h p11l
;| p12h p12l
;----
;| 6th p13h p13l
;| p14h p14l
;| p15h p15l
;|p16h p16l
;-----
;_________________________________________________________________
; c8 c7 c6 c5 c4 c3 c2 c1
;
; As each multiply is done, the partial product is added to
; the destination 'dst'
;
;
;
cproc _MulUnsArrByUnsArr,,_mgmn
if _LDATA
push ds
push es
mov ax,parm7_
cmp ax,parm8_ ; see which is larger
jle nochg ; m<n
xchg ax,parm8_ ; exchange m,n and
mov parm7_,ax ; # multiplier ints in ax
mov bx,parm1_ ; the order of src1 and src2
xchg bx,parm3_ ; so multiplier remains
mov parm1_,bx ; as parm1_
mov bx,parm2_
xchg bx,parm4_
mov parm2_,bx
else
mov ax,parm4_ ; number of digits in multiplier
cmp ax,parm5_ ; see which is larger
jle nochg ; m<n
xchg ax,parm5_ ; exchange m,n and
mov parm4_,ax ; # multiplier ints in ax
mov bx,parm1_ ; the order of src1 and src2
xchg bx,parm2_ ; so multiplier remains
mov parm1_,bx
endif
nochg:
ifdef DSNOTHING
mov bx,seg wGMTemp1
mov ds,bx
endif
mov wGMTemp1, ax ; save # reps in global
; clear the destination
if _LDATA
les di,parm5_ ; assure zero dst
add di,2 ; by concatenating zeroes
lds si,parm5_ ; starting with the first
sub ax,ax
mov [si],ax
mov cx,9
rep movsw ; zero-fill dst
les di,parm5_
else
push es ; save entry es
mov ax,ds
mov es,ax
mov di,parm3_ ; set up to zero dst
mov si,di
add di,2
sub ax,ax
mov [si],ax
mov cx,9
rep movsw ; zero-fill dst
mov di,parm3_ ; reset to start of dst
pop es ; restore es
endif
;
; Do only the number of multiplies required by the
; number of 16-bit int's in the multiplier and multiplicand
;
bigloop:
if _LDATA
mov cx,parm8_ ; # digits in multiplicand
lds si,parm1_ ; load si with multiplier S.A.
mov ax,[si] ; get 1st(next) multiplier
mov parm9_,ax ; to mult for partial products
lds si,parm3_ ; ptr to lsd of multiplicand
else
mov cx,parm5_ ; # digits in multiplicand
mov si,parm1_ ; load si with multiplier S.A.
mov ax,[si] ; get the 1st(next) multiplier
mov parm6_,ax ; to mult for partial products
mov si,parm2_ ; ptr to lsd of multiplicand
endif
xor ax,ax ; clear flags
xor bx,bx ; clear offset
;
; Then do the inner set of multiplies (each loop in the algorithm)
;
mlp:
if _LDATA
mov ax,[si][bx] ; starting at lsd, get next highest
mul Word Ptr parm9_ ; 16-bit multiplicand & get product
clc ; clear uns mult carry (value in dx)
add es:[di+bx],ax ; add partial products to dst
adc es:[di+bx+2],dx ; then if carry gets set from
else
mov ax,[si+bx] ; starting at lsd, get next highest
mul Word Ptr parm6_ ; 16-bit multiplicand & get product
clc ; clear uns mult carry (value in dx)
add [di+bx],ax ; add partial products to dst
adc [di+bx+2],dx ; then if carry gets set from
endif
mov ax,dx ; add msb's to next word
jc carry ; here, must get special handling
nocary:
add bx,2 ; offsets
loop mlp ; done yet??
;
; Yes. Now see if we've multiplied by all the multiplier
; digits.
;
if _LDATA
mov bx,ds ; save ds
ifndef DSNOTHING
mov ax, seg DGROUP
else
mov ax,seg wGMTemp1
endif
mov ds,ax
dec wGMTemp1 ; see if we are done with the
cmp wGMTemp1,0 ; outer loop
mov ds,bx ;restore ds after compare
else
dec wGMTemp1 ; see if we are done with the
cmp wGMTemp1,0 ; outer loop
endif
je done ; yes: done
if _LDATA
add Word Ptr parm1_,2 ; pts to next int in multiplier
else
add Word Ptr parm1_,2
endif
add di,2
xor bx,bx ; clear the offset
jmp short bigloop
;
; Here to propogate as many column carries as can happen. Since
; we guarantee than non-negative numbers are passed, this means that
; a max of 4 additional adds to the destination 'dst' could be made,
; and that no carry can happen from the most significant word.
; Therefore, we add a max of 4 times without changing di or bx.
carry:
mov ax,0000h ; we'll add only the carry
if _LDATA
adc es:[di+bx+4],ax
jnc nocary ; if no carry, return to mult
adc es:[di+bx+6],ax
jnc nocary
adc es:[di+bx+8],ax
jnc nocary
adc es:[di+bx+10],ax ; can't exceed dst bounds
else
adc [di+bx+4],ax
jnc nocary ; if no carry, return to mult
adc [di+bx+6],ax
jnc nocary
adc [di+bx+8],ax
jnc nocary
adc [di+bx+10],ax ; can't exceed dst bounds
endif
jmp short nocary ; &cant carry here
done:
if _LDATA
pop es
pop ds
endif
cproce
endps
END