campo-sirio/gfm/mgmn.asm

 ; int  _MulUnsArrByUnsArr(src1,src2,dst,m,n,tmp1)
 ;
 ; ARGUMENT
 ;      unsigned *src1[m],*src2[n]; where (m,n)<=5
 ;      unsigned *dst[10];      destination is unsigned [10]
 ;      int     m,n;    m=#ints in src1, n=#ints in src2
 ;      int     tmp1;  where tmp1 is used as a temp multiplier for each loop
 ;
 ; DESCRIPTION
 ;
 ;      Multiplies multiplier by multiplicand giving dst. Src1 and scr2 are
 ;   80-bitx80-bit is computed to 160-bit. The number of ints in src1
 ;   and src2 are examined to determine which is the multiplier, ie.
 ;   which has the fewest number of ints. If they have the same number
 ;      number of ints, then src1 will be the multiplier. As each int
 ;   is multiplied to obtain the partial product, it is added to the dst
 ;   and any carries are added to succeeding column locations in the
 ;   dst array.
 ;
 ; SIDE EFFECTS
 ;      Src1 and src2 remain unchanged. Dst IS ZEROED PRIOR TO ADD. NOTE THAT
 ;  NEITHER pSrc1 or pSrc2 CAN HAVE THE MSB OF THE HIGH-ORDER INT SET, ie.
 ;  neither number can be negative. If it is, the results are indeterminate.
 ;
 ; RETURNS
 ;      None.
 ;
 ; AUTHOR
 ;  Andy Anderson   04-Jun-88           1030
 ;   Copyright (C) 1987-90 Greenleaf Software Inc.  All Rights Reserved.
 ;
 ; MODIFICATIONS
 ;
 ;
        .SFCOND

        include model.h
        include prologue.h
        include gm.equ


;  partial products for intermediate results

        ReferVar        wGMTemp1,<cWord>

        dseg    _gm
        endds

        pseg    gmath
;
;
;  if large memory model then:
;
;       parm1_ = ptr to multiplier
;       parm3_ = ptr to multiplicand
;       parm5_ = destination segment ptr } pointer to unsigned[10]
;                destination offset ptr  }  destination
;       parm7_ = # of 16-bit int's in multiplier
;       parm8_ = # of 16-bit int's in multiplicand
;       parm9_ = temp for current multiplier
;
;  for if small model then
;       parm1_ = ptr to multiplier
;       parm2_ = ptr to multiplicand
;       parm3_ = ptr to unsigned[10] dst
;       parm4_ = # of 16-bit int's in MULTIPLIER (a1,...,an)
;       parm5_ = # of 16-bit int's in MULTIPLICAND(b1,...,bn)
;       parm6_ = temp for current multiplier
;
;
;  Then set up and do the first set of mults. Note that the # of
;    'bigloop' iterations correspond to the number of 16-bit digits
;   in the multiplier and the 'mlp' loop iterations correspond to the
;   # ints in the multiplicand. [these line up if you set tab=5].
;       Algorithm(32-bitx32-bit):
;                                               b2      b1
;                                               a2      a1
;                                               __________
;
;| first                                        p1h     p1l
;|iteration                     p2h     p2l
;|2nd                           p3h     p3l
;|iter                  p4h     p4l
;       ____________________________________
;                      c4       c3      c2      c1
;
;or---> Algorithm(64-bitx32-bit):
;                               b4      b3      b2      b1
;                                               a2      a1
;                               __________________
;| first                                        p1h     p1l
;|iteration                     p2h     p2l
;|                              p3h     p3l
;|                      p4h     p4l
;----
;|                                      p5h     p5l
;| 2nd                  p6h     p6l
;|iter          p7h     p7l
;|              p8h     p8l
;-----
;______________________________________
; 0     0       c6      c5      c4      c3      c2      c1
;
;or---> Algorithm(64-bitx64-bit):
;                                       b4      b3      b2      b1
;                                       a4      a3      a2      a1
;                                       __________________
;| first                                                p1h     p1l
;|iteration                             p2h     p2l
;|                                      p3h     p3l
;|                              p4h     p4l
;----
;|                                              p5h     p5l
;| 2nd                          p6h     p6l
;|iter                  p7h     p7l
;|                      p8h     p8l
;-----
;| third                                        p1h     p1l
;|iteration                     p2h     p2l
;|                              p3h     p3l
;|                      p4h     p4l
;----
;|                                      p5h     p5l
;| 4th                  p6h     p6l
;|iter          p7h     p7l
;|              p8h     p8l
;-----
;| fifth                        p9h     p9l
;|iteration     p10h    p10l
;|              p11h    p11l
;|      p12h    p12l
;----
;| 6th                  p13h    p13l
;|              p14h    p14l
;|      p15h    p15l
;|p16h p16l
;-----
;_________________________________________________________________
;  c8   c7      c6      c5      c4      c3      c2      c1
;
;       As each multiply is done, the partial product is added to
;  the destination 'dst'
;
;
;
        cproc   _MulUnsArrByUnsArr,,_mgmn

if      _LDATA
        push    ds
        push    es
        mov     ax,parm7_
        cmp     ax,parm8_       ; see which is larger
        jle     nochg           ; m<n
        xchg    ax,parm8_               ; exchange m,n and
        mov     parm7_,ax               ; # multiplier ints in ax
        mov     bx,parm1_               ; the order of src1 and src2
        xchg    bx,parm3_               ; so multiplier remains
        mov     parm1_,bx               ; as parm1_
        mov     bx,parm2_
        xchg    bx,parm4_
        mov     parm2_,bx
else
        mov     ax,parm4_               ; number of digits in multiplier
        cmp     ax,parm5_               ; see which is larger
        jle     nochg           ; m<n
        xchg    ax,parm5_               ; exchange m,n and
        mov     parm4_,ax               ; # multiplier ints in ax
        mov     bx,parm1_               ; the order of src1 and src2
        xchg    bx,parm2_               ; so multiplier remains
        mov     parm1_,bx
endif

nochg:
ifdef   DSNOTHING
        mov     bx,seg wGMTemp1
        mov     ds,bx
endif
        mov     wGMTemp1, ax    ; save # reps in global

  ;  clear the destination
if      _LDATA
        les     di,parm5_               ; assure zero dst
        add     di,2            ; by concatenating zeroes
        lds     si,parm5_       ; starting with the first
        sub     ax,ax
        mov     [si],ax
        mov     cx,9
  rep   movsw   ; zero-fill dst
        les     di,parm5_
else
        push    es              ; save entry es
        mov     ax,ds
        mov     es,ax
        mov     di,parm3_       ; set up to zero dst
        mov     si,di
        add     di,2
        sub     ax,ax
        mov     [si],ax
        mov     cx,9
rep     movsw   ; zero-fill dst
        mov     di,parm3_       ; reset to start of dst
        pop     es      ; restore es
endif

;
;       Do only the number of multiplies required by the
;   number of 16-bit int's in the multiplier and multiplicand
;

bigloop:
if      _LDATA
        mov     cx,parm8_ ; # digits in multiplicand
        lds     si,parm1_       ; load si with multiplier S.A.
        mov     ax,[si] ; get 1st(next) multiplier
        mov     parm9_,ax ; to mult for partial products
        lds     si,parm3_       ; ptr to lsd of multiplicand
else
        mov     cx,parm5_       ;  # digits in multiplicand
        mov     si,parm1_       ; load si with multiplier S.A.
        mov     ax,[si] ; get the 1st(next) multiplier
        mov     parm6_,ax       ; to mult for partial products
        mov     si,parm2_       ;  ptr to lsd of multiplicand
endif

        xor     ax,ax           ; clear flags
        xor     bx,bx           ;  clear offset
;
;       Then do the inner set of multiplies (each loop in the algorithm)
;
mlp:

if      _LDATA
        mov     ax,[si][bx]     ; starting at lsd, get next highest
        mul     Word Ptr parm9_ ; 16-bit multiplicand & get product
        clc                     ; clear uns mult carry (value in dx)
        add     es:[di+bx],ax   ;  add partial products to dst
        adc     es:[di+bx+2],dx ; then if carry gets set from
else
        mov     ax,[si+bx]      ; starting at lsd, get next highest
        mul     Word Ptr parm6_ ; 16-bit multiplicand & get product
        clc                     ; clear uns mult carry (value in dx)
        add     [di+bx],ax      ;  add partial products to dst
        adc     [di+bx+2],dx    ; then if carry gets set from
endif
        mov     ax,dx           ; add msb's to next word
        jc      carry   ; here, must get special handling
nocary:
        add     bx,2            ;   offsets
        loop    mlp             ; done yet??
;
;       Yes. Now see if we've multiplied by all the multiplier
;   digits.
;
if   _LDATA
        mov     bx,ds   ; save ds
        ifndef  DSNOTHING
        mov     ax, seg DGROUP
        else
        mov     ax,seg wGMTemp1
        endif
        mov     ds,ax
        dec     wGMTemp1        ; see if we are done with the
        cmp     wGMTemp1,0 ;  outer loop
        mov     ds,bx   ;restore ds after compare
else
        dec     wGMTemp1        ; see if we are done with the
        cmp     wGMTemp1,0 ;  outer loop
endif
        je      done            ; yes: done

if      _LDATA
        add     Word Ptr parm1_,2 ; pts to next int in multiplier
else
        add     Word Ptr parm1_,2
endif
        add     di,2
        xor     bx,bx           ; clear the offset
        jmp short bigloop
;
;       Here to propogate as many column carries as can happen. Since
;  we guarantee than non-negative numbers are passed, this means that
;  a max of 4 additional adds to the destination 'dst' could be made,
;  and that no carry can happen from the most significant word.
;  Therefore, we add a max of 4 times without changing di or bx.
carry:
        mov     ax,0000h        ; we'll add only the carry
if      _LDATA
        adc     es:[di+bx+4],ax
        jnc     nocary  ; if no carry, return to mult
        adc     es:[di+bx+6],ax
        jnc     nocary
        adc     es:[di+bx+8],ax
        jnc     nocary
        adc     es:[di+bx+10],ax ; can't exceed dst bounds
else
        adc     [di+bx+4],ax
        jnc     nocary  ; if no carry, return to mult
        adc     [di+bx+6],ax
        jnc     nocary
        adc     [di+bx+8],ax
        jnc     nocary
        adc     [di+bx+10],ax ; can't exceed dst bounds
endif
        jmp short nocary        ; &cant carry here

done:
if      _LDATA
        pop     es
        pop     ds
endif
        cproce
        endps
        END
Patch level : no patch Files correlati : Ricompilazione Demo : [ ] Commento : Aggiunti i sorgenti per Greenleaf Math Library (gfm.dll) git-svn-id: svn://10.65.10.50/trunk@10079 c028cbd2-c16b-5b4b-a496-9718f37d4682 2002-02-26 12:19:02 +00:00			`; int _MulUnsArrByUnsArr(src1,src2,dst,m,n,tmp1)`
			`;`
			`; ARGUMENT`
			`; unsigned src1[m],src2[n]; where (m,n)<=5`
			`; unsigned *dst[10]; destination is unsigned [10]`
			`; int m,n; m=#ints in src1, n=#ints in src2`
			`; int tmp1; where tmp1 is used as a temp multiplier for each loop`
			`;`
			`; DESCRIPTION`
			`;`
			`; Multiplies multiplier by multiplicand giving dst. Src1 and scr2 are`
			`; 80-bitx80-bit is computed to 160-bit. The number of ints in src1`
			`; and src2 are examined to determine which is the multiplier, ie.`
			`; which has the fewest number of ints. If they have the same number`
			`; number of ints, then src1 will be the multiplier. As each int`
			`; is multiplied to obtain the partial product, it is added to the dst`
			`; and any carries are added to succeeding column locations in the`
			`; dst array.`
			`;`
			`; SIDE EFFECTS`
			`; Src1 and src2 remain unchanged. Dst IS ZEROED PRIOR TO ADD. NOTE THAT`
			`; NEITHER pSrc1 or pSrc2 CAN HAVE THE MSB OF THE HIGH-ORDER INT SET, ie.`
			`; neither number can be negative. If it is, the results are indeterminate.`
			`;`
			`; RETURNS`
			`; None.`
			`;`
			`; AUTHOR`
			`; Andy Anderson 04-Jun-88 1030`
			`; Copyright (C) 1987-90 Greenleaf Software Inc. All Rights Reserved.`
			`;`
			`; MODIFICATIONS`
			`;`
			`;`
			`.SFCOND`

			`include model.h`
			`include prologue.h`
			`include gm.equ`


			`; partial products for intermediate results`

			`ReferVar wGMTemp1,<cWord>`

			`dseg _gm`
			`endds`

			`pseg gmath`
			`;`
			`;`
			`; if large memory model then:`
			`;`
			`; parm1_ = ptr to multiplier`
			`; parm3_ = ptr to multiplicand`
			`; parm5_ = destination segment ptr } pointer to unsigned[10]`
			`; destination offset ptr } destination`
			`; parm7_ = # of 16-bit int's in multiplier`
			`; parm8_ = # of 16-bit int's in multiplicand`
			`; parm9_ = temp for current multiplier`
			`;`
			`; for if small model then`
			`; parm1_ = ptr to multiplier`
			`; parm2_ = ptr to multiplicand`
			`; parm3_ = ptr to unsigned[10] dst`
			`; parm4_ = # of 16-bit int's in MULTIPLIER (a1,...,an)`
			`; parm5_ = # of 16-bit int's in MULTIPLICAND(b1,...,bn)`
			`; parm6_ = temp for current multiplier`
			`;`
			`;`
			`; Then set up and do the first set of mults. Note that the # of`
			`; 'bigloop' iterations correspond to the number of 16-bit digits`
			`; in the multiplier and the 'mlp' loop iterations correspond to the`
			`; # ints in the multiplicand. [these line up if you set tab=5].`
			`; Algorithm(32-bitx32-bit):`
			`; b2 b1`
			`; a2 a1`
			`; __________`
			`;`
			`;\| first p1h p1l`
			`;\|iteration p2h p2l`
			`;\|2nd p3h p3l`
			`;\|iter p4h p4l`
			`; ____________________________________`
			`; c4 c3 c2 c1`
			`;`
			`;or---> Algorithm(64-bitx32-bit):`
			`; b4 b3 b2 b1`
			`; a2 a1`
			`; __________________`
			`;\| first p1h p1l`
			`;\|iteration p2h p2l`
			`;\| p3h p3l`
			`;\| p4h p4l`
			`;----`
			`;\| p5h p5l`
			`;\| 2nd p6h p6l`
			`;\|iter p7h p7l`
			`;\| p8h p8l`
			`;-----`
			`;______________________________________`
			`; 0 0 c6 c5 c4 c3 c2 c1`
			`;`
			`;or---> Algorithm(64-bitx64-bit):`
			`; b4 b3 b2 b1`
			`; a4 a3 a2 a1`
			`; __________________`
			`;\| first p1h p1l`
			`;\|iteration p2h p2l`
			`;\| p3h p3l`
			`;\| p4h p4l`
			`;----`
			`;\| p5h p5l`
			`;\| 2nd p6h p6l`
			`;\|iter p7h p7l`
			`;\| p8h p8l`
			`;-----`
			`;\| third p1h p1l`
			`;\|iteration p2h p2l`
			`;\| p3h p3l`
			`;\| p4h p4l`
			`;----`
			`;\| p5h p5l`
			`;\| 4th p6h p6l`
			`;\|iter p7h p7l`
			`;\| p8h p8l`
			`;-----`
			`;\| fifth p9h p9l`
			`;\|iteration p10h p10l`
			`;\| p11h p11l`
			`;\| p12h p12l`
			`;----`
			`;\| 6th p13h p13l`
			`;\| p14h p14l`
			`;\| p15h p15l`
			`;\|p16h p16l`
			`;-----`
			`;_________________________________________________________________`
			`; c8 c7 c6 c5 c4 c3 c2 c1`
			`;`
			`; As each multiply is done, the partial product is added to`
			`; the destination 'dst'`
			`;`
			`;`
			`;`
			`cproc _MulUnsArrByUnsArr,,_mgmn`

			`if _LDATA`
			`push ds`
			`push es`
			`mov ax,parm7_`
			`cmp ax,parm8_ ; see which is larger`
			`jle nochg ; m<n`
			`xchg ax,parm8_ ; exchange m,n and`
			`mov parm7_,ax ; # multiplier ints in ax`
			`mov bx,parm1_ ; the order of src1 and src2`
			`xchg bx,parm3_ ; so multiplier remains`
			`mov parm1_,bx ; as parm1_`
			`mov bx,parm2_`
			`xchg bx,parm4_`
			`mov parm2_,bx`
			`else`
			`mov ax,parm4_ ; number of digits in multiplier`
			`cmp ax,parm5_ ; see which is larger`
			`jle nochg ; m<n`
			`xchg ax,parm5_ ; exchange m,n and`
			`mov parm4_,ax ; # multiplier ints in ax`
			`mov bx,parm1_ ; the order of src1 and src2`
			`xchg bx,parm2_ ; so multiplier remains`
			`mov parm1_,bx`
			`endif`

			`nochg:`
			`ifdef DSNOTHING`
			`mov bx,seg wGMTemp1`
			`mov ds,bx`
			`endif`
			`mov wGMTemp1, ax ; save # reps in global`

			`; clear the destination`
			`if _LDATA`
			`les di,parm5_ ; assure zero dst`
			`add di,2 ; by concatenating zeroes`
			`lds si,parm5_ ; starting with the first`
			`sub ax,ax`
			`mov [si],ax`
			`mov cx,9`
			`rep movsw ; zero-fill dst`
			`les di,parm5_`
			`else`
			`push es ; save entry es`
			`mov ax,ds`
			`mov es,ax`
			`mov di,parm3_ ; set up to zero dst`
			`mov si,di`
			`add di,2`
			`sub ax,ax`
			`mov [si],ax`
			`mov cx,9`
			`rep movsw ; zero-fill dst`
			`mov di,parm3_ ; reset to start of dst`
			`pop es ; restore es`
			`endif`

			`;`
			`; Do only the number of multiplies required by the`
			`; number of 16-bit int's in the multiplier and multiplicand`
			`;`

			`bigloop:`
			`if _LDATA`
			`mov cx,parm8_ ; # digits in multiplicand`
			`lds si,parm1_ ; load si with multiplier S.A.`
			`mov ax,[si] ; get 1st(next) multiplier`
			`mov parm9_,ax ; to mult for partial products`
			`lds si,parm3_ ; ptr to lsd of multiplicand`
			`else`
			`mov cx,parm5_ ; # digits in multiplicand`
			`mov si,parm1_ ; load si with multiplier S.A.`
			`mov ax,[si] ; get the 1st(next) multiplier`
			`mov parm6_,ax ; to mult for partial products`
			`mov si,parm2_ ; ptr to lsd of multiplicand`
			`endif`

			`xor ax,ax ; clear flags`
			`xor bx,bx ; clear offset`
			`;`
			`; Then do the inner set of multiplies (each loop in the algorithm)`
			`;`
			`mlp:`

			`if _LDATA`
			`mov ax,[si][bx] ; starting at lsd, get next highest`
			`mul Word Ptr parm9_ ; 16-bit multiplicand & get product`
			`clc ; clear uns mult carry (value in dx)`
			`add es:[di+bx],ax ; add partial products to dst`
			`adc es:[di+bx+2],dx ; then if carry gets set from`
			`else`
			`mov ax,[si+bx] ; starting at lsd, get next highest`
			`mul Word Ptr parm6_ ; 16-bit multiplicand & get product`
			`clc ; clear uns mult carry (value in dx)`
			`add [di+bx],ax ; add partial products to dst`
			`adc [di+bx+2],dx ; then if carry gets set from`
			`endif`
			`mov ax,dx ; add msb's to next word`
			`jc carry ; here, must get special handling`
			`nocary:`
			`add bx,2 ; offsets`
			`loop mlp ; done yet??`
			`;`
			`; Yes. Now see if we've multiplied by all the multiplier`
			`; digits.`
			`;`
			`if _LDATA`
			`mov bx,ds ; save ds`
			`ifndef DSNOTHING`
			`mov ax, seg DGROUP`
			`else`
			`mov ax,seg wGMTemp1`
			`endif`
			`mov ds,ax`
			`dec wGMTemp1 ; see if we are done with the`
			`cmp wGMTemp1,0 ; outer loop`
			`mov ds,bx ;restore ds after compare`
			`else`
			`dec wGMTemp1 ; see if we are done with the`
			`cmp wGMTemp1,0 ; outer loop`
			`endif`
			`je done ; yes: done`

			`if _LDATA`
			`add Word Ptr parm1_,2 ; pts to next int in multiplier`
			`else`
			`add Word Ptr parm1_,2`
			`endif`
			`add di,2`
			`xor bx,bx ; clear the offset`
			`jmp short bigloop`
			`;`
			`; Here to propogate as many column carries as can happen. Since`
			`; we guarantee than non-negative numbers are passed, this means that`
			`; a max of 4 additional adds to the destination 'dst' could be made,`
			`; and that no carry can happen from the most significant word.`
			`; Therefore, we add a max of 4 times without changing di or bx.`
			`carry:`
			`mov ax,0000h ; we'll add only the carry`
			`if _LDATA`
			`adc es:[di+bx+4],ax`
			`jnc nocary ; if no carry, return to mult`
			`adc es:[di+bx+6],ax`
			`jnc nocary`
			`adc es:[di+bx+8],ax`
			`jnc nocary`
			`adc es:[di+bx+10],ax ; can't exceed dst bounds`
			`else`
			`adc [di+bx+4],ax`
			`jnc nocary ; if no carry, return to mult`
			`adc [di+bx+6],ax`
			`jnc nocary`
			`adc [di+bx+8],ax`
			`jnc nocary`
			`adc [di+bx+10],ax ; can't exceed dst bounds`
			`endif`
			`jmp short nocary ; &cant carry here`

			`done:`
			`if _LDATA`
			`pop es`
			`pop ds`
			`endif`
			`cproce`
			`endps`
			`END`