campo-sirio/gfm/mgmn.asm

 ; int  _MulUnsArrByUnsArr(src1,src2,dst,m,n,tmp1)
 ;
 ; ARGUMENT
 ;      unsigned *src1[m],*src2[n]; where (m,n)<=5
 ;      unsigned *dst[10];      destination is unsigned [10]
 ;      int     m,n;    m=#ints in src1, n=#ints in src2
 ;      int     tmp1;  where tmp1 is used as a temp multiplier for each loop
 ;
 ; DESCRIPTION
 ;
 ;      Multiplies multiplier by multiplicand giving dst. Src1 and scr2 are
 ;   80-bitx80-bit is computed to 160-bit. The number of ints in src1
 ;   and src2 are examined to determine which is the multiplier, ie.
 ;   which has the fewest number of ints. If they have the same number
 ;      number of ints, then src1 will be the multiplier. As each int
 ;   is multiplied to obtain the partial product, it is added to the dst
 ;   and any carries are added to succeeding column locations in the
 ;   dst array.
 ;
 ; SIDE EFFECTS
 ;      Src1 and src2 remain unchanged. Dst IS ZEROED PRIOR TO ADD. NOTE THAT
 ;  NEITHER pSrc1 or pSrc2 CAN HAVE THE MSB OF THE HIGH-ORDER INT SET, ie.
 ;  neither number can be negative. If it is, the results are indeterminate.
 ;
 ; RETURNS
 ;      None.
 ;
 ; AUTHOR
 ;  Andy Anderson   04-Jun-88           1030
 ;   Copyright (C) 1987-90 Greenleaf Software Inc.  All Rights Reserved.
 ;
 ; MODIFICATIONS
 ;
 ;
        .SFCOND

        include model.h
        include prologue.h
        include gm.equ


;  partial products for intermediate results

        ReferVar        wGMTemp1,<cWord>

        dseg    _gm
        endds

        pseg    gmath
;
;
;  if large memory model then:
;
;       parm1_ = ptr to multiplier
;       parm3_ = ptr to multiplicand
;       parm5_ = destination segment ptr } pointer to unsigned[10]
;                destination offset ptr  }  destination
;       parm7_ = # of 16-bit int's in multiplier
;       parm8_ = # of 16-bit int's in multiplicand
;       parm9_ = temp for current multiplier
;
;  for if small model then
;       parm1_ = ptr to multiplier
;       parm2_ = ptr to multiplicand
;       parm3_ = ptr to unsigned[10] dst
;       parm4_ = # of 16-bit int's in MULTIPLIER (a1,...,an)
;       parm5_ = # of 16-bit int's in MULTIPLICAND(b1,...,bn)
;       parm6_ = temp for current multiplier
;
;
;  Then set up and do the first set of mults. Note that the # of
;    'bigloop' iterations correspond to the number of 16-bit digits
;   in the multiplier and the 'mlp' loop iterations correspond to the
;   # ints in the multiplicand. [these line up if you set tab=5].
;       Algorithm(32-bitx32-bit):
;                                               b2      b1
;                                               a2      a1
;                                               __________
;
;| first                                        p1h     p1l
;|iteration                     p2h     p2l
;|2nd                           p3h     p3l
;|iter                  p4h     p4l
;       ____________________________________
;                      c4       c3      c2      c1
;
;or---> Algorithm(64-bitx32-bit):
;                               b4      b3      b2      b1
;                                               a2      a1
;                               __________________
;| first                                        p1h     p1l
;|iteration                     p2h     p2l
;|                              p3h     p3l
;|                      p4h     p4l
;----
;|                                      p5h     p5l
;| 2nd                  p6h     p6l
;|iter          p7h     p7l
;|              p8h     p8l
;-----
;______________________________________
; 0     0       c6      c5      c4      c3      c2      c1
;
;or---> Algorithm(64-bitx64-bit):
;                                       b4      b3      b2      b1
;                                       a4      a3      a2      a1
;                                       __________________
;| first                                                p1h     p1l
;|iteration                             p2h     p2l
;|                                      p3h     p3l
;|                              p4h     p4l
;----
;|                                              p5h     p5l
;| 2nd                          p6h     p6l
;|iter                  p7h     p7l
;|                      p8h     p8l
;-----
;| third                                        p1h     p1l
;|iteration                     p2h     p2l
;|                              p3h     p3l
;|                      p4h     p4l
;----
;|                                      p5h     p5l
;| 4th                  p6h     p6l
;|iter          p7h     p7l
;|              p8h     p8l
;-----
;| fifth                        p9h     p9l
;|iteration     p10h    p10l
;|              p11h    p11l
;|      p12h    p12l
;----
;| 6th                  p13h    p13l
;|              p14h    p14l
;|      p15h    p15l
;|p16h p16l
;-----
;_________________________________________________________________
;  c8   c7      c6      c5      c4      c3      c2      c1
;
;       As each multiply is done, the partial product is added to
;  the destination 'dst'
;
;
;
        cproc   _MulUnsArrByUnsArr,,_mgmn

if      _LDATA
        push    ds
        push    es
        mov     ax,parm7_
        cmp     ax,parm8_       ; see which is larger
        jle     nochg           ; m<n
        xchg    ax,parm8_               ; exchange m,n and
        mov     parm7_,ax               ; # multiplier ints in ax
        mov     bx,parm1_               ; the order of src1 and src2
        xchg    bx,parm3_               ; so multiplier remains
        mov     parm1_,bx               ; as parm1_
        mov     bx,parm2_
        xchg    bx,parm4_
        mov     parm2_,bx
else
        mov     ax,parm4_               ; number of digits in multiplier
        cmp     ax,parm5_               ; see which is larger
        jle     nochg           ; m<n
        xchg    ax,parm5_               ; exchange m,n and
        mov     parm4_,ax               ; # multiplier ints in ax
        mov     bx,parm1_               ; the order of src1 and src2
        xchg    bx,parm2_               ; so multiplier remains
        mov     parm1_,bx
endif

nochg:
ifdef   DSNOTHING
        mov     bx,seg wGMTemp1
        mov     ds,bx
endif
        mov     wGMTemp1, ax    ; save # reps in global

  ;  clear the destination
if      _LDATA
        les     di,parm5_               ; assure zero dst
        add     di,2            ; by concatenating zeroes
        lds     si,parm5_       ; starting with the first
        sub     ax,ax
        mov     [si],ax
        mov     cx,9
  rep   movsw   ; zero-fill dst
        les     di,parm5_
else
        push    es              ; save entry es
        mov     ax,ds
        mov     es,ax
        mov     di,parm3_       ; set up to zero dst
        mov     si,di
        add     di,2
        sub     ax,ax
        mov     [si],ax
        mov     cx,9
rep     movsw   ; zero-fill dst
        mov     di,parm3_       ; reset to start of dst
        pop     es      ; restore es
endif

;
;       Do only the number of multiplies required by the
;   number of 16-bit int's in the multiplier and multiplicand
;

bigloop:
if      _LDATA
        mov     cx,parm8_ ; # digits in multiplicand
        lds     si,parm1_       ; load si with multiplier S.A.
        mov     ax,[si] ; get 1st(next) multiplier
        mov     parm9_,ax ; to mult for partial products
        lds     si,parm3_       ; ptr to lsd of multiplicand
else
        mov     cx,parm5_       ;  # digits in multiplicand
        mov     si,parm1_       ; load si with multiplier S.A.
        mov     ax,[si] ; get the 1st(next) multiplier
        mov     parm6_,ax       ; to mult for partial products
        mov     si,parm2_       ;  ptr to lsd of multiplicand
endif

        xor     ax,ax           ; clear flags
        xor     bx,bx           ;  clear offset
;
;       Then do the inner set of multiplies (each loop in the algorithm)
;
mlp:

if      _LDATA
        mov     ax,[si][bx]     ; starting at lsd, get next highest
        mul     Word Ptr parm9_ ; 16-bit multiplicand & get product
        clc                     ; clear uns mult carry (value in dx)
        add     es:[di+bx],ax   ;  add partial products to dst
        adc     es:[di+bx+2],dx ; then if carry gets set from
else
        mov     ax,[si+bx]      ; starting at lsd, get next highest
        mul     Word Ptr parm6_ ; 16-bit multiplicand & get product
        clc                     ; clear uns mult carry (value in dx)
        add     [di+bx],ax      ;  add partial products to dst
        adc     [di+bx+2],dx    ; then if carry gets set from
endif
        mov     ax,dx           ; add msb's to next word
        jc      carry   ; here, must get special handling
nocary:
        add     bx,2            ;   offsets
        loop    mlp             ; done yet??
;
;       Yes. Now see if we've multiplied by all the multiplier
;   digits.
;
if   _LDATA
        mov     bx,ds   ; save ds
        ifndef  DSNOTHING
        mov     ax, seg DGROUP
        else
        mov     ax,seg wGMTemp1
        endif
        mov     ds,ax
        dec     wGMTemp1        ; see if we are done with the
        cmp     wGMTemp1,0 ;  outer loop
        mov     ds,bx   ;restore ds after compare
else
        dec     wGMTemp1        ; see if we are done with the
        cmp     wGMTemp1,0 ;  outer loop
endif
        je      done            ; yes: done

if      _LDATA
        add     Word Ptr parm1_,2 ; pts to next int in multiplier
else
        add     Word Ptr parm1_,2
endif
        add     di,2
        xor     bx,bx           ; clear the offset
        jmp short bigloop
;
;       Here to propogate as many column carries as can happen. Since
;  we guarantee than non-negative numbers are passed, this means that
;  a max of 4 additional adds to the destination 'dst' could be made,
;  and that no carry can happen from the most significant word.
;  Therefore, we add a max of 4 times without changing di or bx.
carry:
        mov     ax,0000h        ; we'll add only the carry
if      _LDATA
        adc     es:[di+bx+4],ax
        jnc     nocary  ; if no carry, return to mult
        adc     es:[di+bx+6],ax
        jnc     nocary
        adc     es:[di+bx+8],ax
        jnc     nocary
        adc     es:[di+bx+10],ax ; can't exceed dst bounds
else
        adc     [di+bx+4],ax
        jnc     nocary  ; if no carry, return to mult
        adc     [di+bx+6],ax
        jnc     nocary
        adc     [di+bx+8],ax
        jnc     nocary
        adc     [di+bx+10],ax ; can't exceed dst bounds
endif
        jmp short nocary        ; &cant carry here

done:
if      _LDATA
        pop     es
        pop     ds
endif
        cproce
        endps
        END