Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions src/crt/i48mulhs.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
assume adl=1

section .text

public __i48mulhs

; UDE:UHL = ((int96_t)UDE:UHL * (int96_t)UIY:UBC) >> 48
__i48mulhs:
push af
push iy
push bc
push de
push hl

push hl
lea hl, iy
add hl, hl
sbc a, a

ld hl, $800000
add hl, de
pop hl
rla

call __i48mulhu

; if (UDE:UHL < 0) { result -= UIY:UBC; }
rrca
call c, __i48sub

pop bc
pop iy

; if (UIY:UBC < 0) { result -= UDE:UHL; }
rrca
call c, __i48sub

pop bc
pop iy
pop af
ret

extern __i48mulhu
extern __i48sub
75 changes: 75 additions & 0 deletions src/crt/i48mulhu.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
assume adl=1

section .text

public __i48mulhu

; UDE:UHL = ((uint96_t)UDE:UHL * (uint96_t)UIY:UBC) >> 48
__i48mulhu:
; CC: 93 bytes
; minimum: 92F + 42R + 42W + 2
; maximum: 94F + 42R + 42W + 4
; including __i48mulu:
; minimum: 900F + 246R + 182W + 342
; maximum: 902F + 246R + 182W + 344
push ix
push iy
push bc
ld ix, 0
lea iy, ix
add ix, sp
push de
push hl

; x_lo * y_lo
lea de, iy
call __i48mulu
push de ; UHL * UBC (low carry)

; x_hi * y_lo
lea de, iy
ld hl, (ix - 3)
call __i48mulu
push de ; hi24
push hl ; lo24

; x_lo * y_hi
lea de, iy
ld bc, (ix + 3)
ld hl, (ix - 6)
call __i48mulu
pop bc ; lo24
add hl, bc
ex de, hl
pop bc ; hi24
adc hl, bc

pop bc ; UHL * UBC (low carry)
push af ; upper carry
ex de, hl
add hl, bc
jr nc, .no_low_carry
inc de
.no_low_carry:
push de ; high carry

; x_hi * y_hi
lea de, iy
ld bc, (ix + 3)
ld hl, (ix - 3)
call __i48mulu
pop bc ; high carry
pop af ; upper carry
jr nc, .no_upper_carry
inc de
.no_upper_carry:
add hl, bc
ld sp, ix
pop bc
pop iy
pop ix
ret nc ; no high carry
inc de
ret

extern __i48mulu
41 changes: 41 additions & 0 deletions src/crt/imulhs.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
assume adl=1

section .text

public __imulhs

; UHL = ((int48_t)UHL * (int48_t)UBC) >> 24
__imulhs:
push af
push bc
push hl

push hl
add hl, hl
rla
ld hl, $800000
add hl, bc
rla
pop hl

call __imulhu

; if (UBC < 0) { result -= UHL; }
pop bc
cpl
rra
jr c, .positive_bc
sbc hl, bc
.positive_bc:

; if (UHL < 0) { result -= UBC; }
pop bc
rra
jr c, .positive_hl
sbc hl, bc
.positive_hl:

pop af
ret

extern __imulhu
145 changes: 145 additions & 0 deletions src/crt/imulhu.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
assume adl=1

section .text

public __imulhu

; UHL = ((uint48_t)UHL * (uint48_t)UBC) >> 24
__imulhu:
; TODO: Optimize this routine as this is mostly just a copy paste of __i48mulu with some stuff removed.
;
; CC: 118*r(PC)+39*r(SPL)+38*w(SPL)+37
; CC: 117 bytes | 118F + 39R + 38W + 37
push de
; backup af
push af
push ix
ld ix, 0
add ix, sp

; On stack to get upper byte when needed
push de ; de will also be used to perform the actual multiplication
push hl
push iy
push bc

; bc = a[0], a[1]
ld a, l ; a = b[0]
ld iy, (ix - 5) ; iy = b[1], b[2]

; or a, a ; carry is already cleared
sbc hl, hl
push hl ; upper bytes of sum at -15
; Stack Use:
; ix-1 : deu b[5]
; ix-2 : d b[4]
; ix-3 : e b[3]
; ix-4 : hlu b[2]
; ix-5 : h b[1]
; ix-6 : l b[0]
; ix-7 : iyu a[5]
; ix-8 : iyh a[4]
; ix-9 : iyl a[3]
; ix-10 : bcu a[2]
; ix-11 : b a[1]
; ix-12 : c a[0]
; ix-13 : sum[5]
; ix-14 : sum[4]
; ix-15 : sum[3]
; ix-16 : sum[2]
; ix-17 : sum[1]
; ix-18 : sum[0]

; ======================================================================
; sum[0-1]

; a[0]*b[0]
ld d, c ; d = a[0]
ld e, a ; e = b[0]
mlt de
push de ; lower bytes of sum at -18

; ======================================================================
; sum[1-2]
ld l, d ; hl will store current partial sum

; a[1]*b[0]
ld d, b ; d = a[1]
ld e, a ; e = b[0]
mlt de
add hl, de

; a[0]*b[1]
ld d, c ; d = a[0]
ld e, iyl ; e = b[1]
mlt de
add hl, de

ld (ix - 17), hl

; ======================================================================
; sum[2-3]
ld hl, (ix - 16) ; hl will store current partial sum

; a[0]*b[2]
ld d, c ; d = a[0]
ld e, iyh ; e = b[2]
mlt de
add hl, de

; a[1]*b[1]
ld d, b ; d = a[1]
ld e, iyl ; e = b[1]
mlt de
add hl, de

; a[2]*b[0]
ld d, (ix - 10) ; d = a[2]
ld e, a ; e = b[0]
mlt de
add hl, de

ld (ix - 16), hl

; ======================================================================
; sum[3-4]
ld hl, (ix - 15) ; hl will store current partial sum

; a[1]*b[2]
ld d, b ; d = a[1]
ld e, iyh ; e = b[2]
mlt de
add hl, de

; a[2]*b[1]
ld d, (ix - 10) ; d = a[2]
ld e, iyl ; e = b[1]
mlt de
add hl, de

ld (ix - 15), hl

; ======================================================================
; sum[4-5]
ld hl, (ix - 14) ; hl will store current partial sum

; a[2]*b[2]
ld d, (ix - 10) ; d = a[2]
ld e, iyh ; e = b[2]
mlt de
add hl, de

ld (ix - 14), l
ld (ix - 13), h

; clean up stack and restore registers
pop de
pop hl
pop bc
pop iy

ld sp, ix
pop ix
pop af
pop de
ret
36 changes: 36 additions & 0 deletions src/crt/llmulhs.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
assume adl=1

section .text

public __llmulhs

; BC:UDE:UHL = ((int128_t)BC:UDE:UHL * (int128_t)(SP64)) >> 64
__llmulhs:
push iy
ld iy, 0
add iy, sp

push bc, de, hl

ld hl, (iy + 6)
ld de, (iy + 9)
ld bc, (iy + 12)

; argument order can be swapped since multiplication is commutative
call __llmulhu

; if ((SP64) < 0) { result -= BC:UDE:UHL; }
bit 7, (iy + 13)
call nz, __llsub

; if (BC:UDE:UHL < 0) { result -= (SP64); }
bit 7, (iy - 2)

ld sp, iy

pop iy
ret z
jp __llsub

extern __llmulhu
extern __llsub
Loading
Loading