; GPU disassembly - Jaguar cartridge Verification
; disassembly by Tursi - 15 Feb 09
;
; To my knowledge there is no existing disassembly of
; the GPU boot procedure, so here's mine. There is an
; object file which I used for some labels. I'm not aware
; of any disassembly or object file of the encrypted header 
; existing. (Well, it turns out there are some out there
; after all... someday we'll use them to improve this one ;) )
; Look for RSAM.DAS/RSAM.GAS
;
; Much help by the Virtual Jaguar source code, KSkunk's
; knowledge of the machine, and the Jag Tech Ref manual.
; Most of the labels come from the boot object file found on
; the AtariHQ CD. Equates and buffers (except bootII) I
; had to name so they may be named misleadingly.
;
; This code is copied into GPU RAM by the 68k at startup.
; The 68k then goes into a loop waiting for the GPU to
; stop. When the GPU stops, the 68k looks for '0x03d0dead'
; in the first address of GPU memory. If it finds that,
; then it proceeds to run the cartridge by branching to
; the vector in the cartridge header.
;
; The JagCD make some assumptions when it boots.
; It decrypts the header into the DSP instead of the GPU,
; then makes a handful of hard-coded fixups to addresses,
; using a table in it's BIOS. Finally, it does the MD5 check
; and swap itself, and then skips that part of the encrypted boot.
; Note the code below doesn't include the JagCD part
; of the decryption but I'll try to note the parts it alters.
;
; The encrypted code contains the startup functionality
; and the MD5 check for the ROM. This code works by
; decrypting the number of blocks specified in the first
; byte of the header (in two's complement) to 'bootII',
; then branching to it. There is limited checksumming to
; ensure that the blocks are probably valid. This checksum
; is also encrypted so is non-trivial to modify. Each block
; takes about 500ms to decode. In a normal Jaguar cartridge,
; with or without the universal header, there are 10
; blocks to decrypt, so boot takes 5 seconds.
;
; The only way to change this would be to use a different
; encrypted header.
;

; decryption equates
cartbase	equ $00800000	; first address of cartridge rom
number		equ 68			; number of bytes in a block, rounded up to 32 bits. The true count is 65.
magic1		equ $ABCDEFFF	; used for a good cart read??
magic2		equ $12345678	; used for a bad cart read??
swapaddr1 	equ	$00F03668	; first address in decrypted code to swap (only relevant on Atari header)
swapaddr2	equ	$00F035D0	; second address in decrypted code to swap (only relevant on Atari header)

; md5 equates from cart header
md5first	equ $008002C0
md5last		equ $00A00000		; 2MB, or $00C00000 on a 4MB cart
md5hash0	equ $67452301
md5hash1	equ $EFCDAB89
md5hash2	equ $98BADCFE
md5hash3	equ $10325476

G_RAM:
	00f03000: dc.b 0,0,0	; should these be here or is this an emulator bug?
	
; Public key 
public:
	00f03003: dc.b $2f,$c5,$0f,$79,$b7,$96,$1b,$10
	00f0300b: dc.b $a2,$ea,$46,$ab,$a1,$f0,$1d,$af
	00f03013: dc.b $c5,$c7,$94,$c0,$08,$b9,$81,$80
	00f0301b: dc.b $5e,$5b,$93,$f5,$03,$02,$41,$fe
	00f03023: dc.b $75,$b7,$1c,$e8,$e7,$22,$79,$a3
	00f0302b: dc.b $d5,$be,$30,$45,$f9,$ea,$35,$d9
	00f03033: dc.b $8a,$0a,$15,$40,$b4,$b4,$e8,$4e
	00f0303b: dc.b $a6,$dd,$17,$ee,$42,$33,$10,$0d
	00f03043: dc.b $f9
; End of public key
	
; Buffer used as temp in decryption
decbuf:
	00f03044: bss 68*3

; buffer used as temp in decryption
cubbuf:
	00f03110: bss 68*3
	
; buffer used as temp in decryption	
keybuf:	
	00f031dc: bss 68*3

; Buffer used as temp in decryption	(holds byteswapped data from cart)
gpubuf:
	00f032a8: bss 68
	
; This is the main entry point to the GPU cart
; verification code.
RSAgpu:
	00F032EC: MOVEI   blkloop ($00F03314),R29    (981D)		; address of main block decrytion loop
	00F032F2: MOVEI   cartbase ($00800000),R00    (9800)	; address in cartridge being read
	00F032F8: MOVEI   number (68),R27    (981B)				; number of bytes in a block
	00F032FE: MOVEI   gpubuf ($00F032A8),R26    (981A)		; small buffer in GPU RAM to read cart into
	00F03304: LOADB   (R00),R13        (9C0D)				; Read block count from cartridge
	00F03306: MOVETA  R13,R02          (91A2)				; Store in R2.B
	00F03308: MOVEI   startcode ($00F035A8),R13    (980D)	; Load address of entry point to decrypted code
	00F0330E: MOVETA  R13,R01          (91A1)				; Store in R1.B
	00F03310: MOVEQ   0,R13            (8C0D)				; Get a zero
	00F03312: MOVETA  R13,R00          (91A0)				; Store in R0.B

; Top of loop for decrypting a block
blkloop:
	00F03314: MOVEI   $000000FF,R23    (9817)				; Get $FF in R23
	00F0331A: MOVE    R26,R14          (8B4E)				; Get address of GPU buffer in R14
	00F0331C: MOVE    R27,R01          (8B61)				; Get number of bytes to read into R1

loadsig:
	00F0331E: MOVEQ   0,R13            (8C0D)				; Clr R13 (temp)
	00F03320: MOVEQ   4,R02            (8C82)				; Load '4' into R2 (index for 4 
bytes)

swapit4:
; read in 32-bits, one byte at a time, byte order 4321
	00F03322: ADDQ    $1,R00           (0820)				; Increment cartridge address
	00F03324: LOADB   (R00),R16        (9C10)				; Read next byte from cart
	00F03326: OR      R16,R13          (2A0D)				; merge into R13 temp
	00F03328: SUBQ    $1,R02           (1822)				; decrement R2 index 
	00F0332A: JR      nz,swapit4 (00F03322)      (D761)		; not zero, keep reading
	00F0332C: RORQ    $8,R13           (750D)				; delay slot, shift over 8 bits
	
	00F0332E: SUBQ    $4,R01           (1881)				; decrement count of bytes read by 4
	00F03330: JR      nz,loadsig (00F0331E)      (D6C1)		; not finished, read the next 32-bits
	00F03332: STORE   R13,(R14+R01)    (F02D)				; delay slot, save the word into the 
GPU buffer (note reverse order!)
	
; One block read into the gpubuffer	- prepare to decrypt. But first,
; we take the last 32 bit block read and strip it down to one byte.
; this is because a block is 65 bytes, but our loop reads 68.
	00F03334: AND     R23,R13          (26ED)				; get least significant byte from 
last word read
	00F03336: STORE   R13,(R14)        (BDCD)				; save back into the gpubuffer 
(overwrites last write)
	00F03338: SUBQ    $3,R00           (1860)				; back up cartridge address 3 bytes
	00F0333A: RORQ    $8,R23           (7517)				; rotate mask to most significant 
byte ($ff000000)
	00F0333C: SHARQ   $8,R23           (6D17)				; shift arithmetic (should give 
$ffff0000)
	00F0333E: MOVE    R23,R22          (8AF6)				; copy mask
	00F03340: NOT     R22              (3016)				; invert (should give $0000ffff)
	00F03342: MOVEI   plierlp ($00F03378),R20    (9814)		; store address of plierlp in R20
	00F03348: MOVEI   candlp ($00F03384),R19    (9813)		; store address of candlp in R19
	00F0334E: MOVEI   multlp ($00F0336A),R28    (981C)		; store address of multlp in R28
	00F03354: MOVEQ   0,R18            (8C12)				; get a zero into R18
	00F03356: MOVE    R26,R04          (8B44)				; get address of GPU buffer into R4
	00F03358: ADD     R27,R04          (0364)				; add number of bytes to process (68)
	00F0335A: MOVE    R04,R07          (8887)				; copy end of buffer address to R7
	00F0335C: MOVE    R26,R03          (8B43)				; get address of GPU buffer into R3
	00F0335E: MOVE    R26,R06          (8B46)				; ... and R6
	00F03360: MOVEI   decbuf ($00F03044),R12    (980C)		; Get another buffer into R12
	00F03366: MOVE    R27,R13          (8B6D)				; copy number of bytes to process 
into R13 (68)
	00F03368: SHARQ   $1,R13           (6C2D)				; Divide byte count by 2 (now word 
count)

; This does an RSA decryption of the block into the buffer at R12?
multlp:
	00F0336A: MOVE    R07,R08          (88E8)				; copy end of source buffer to R8
	00F0336C: SUBQ    $1,R13           (182D)				; Decrement word count

; This block zeros out the work buffer decbuf
clearpro:
	00F0336E: STORE   R18,(R12)        (BD92)				; Write R18 (0) to decbuf ptr R12
	00F03370: ADDQT   $4,R12           (0C8C)				; add 4 to R12 (don't change flags!)
	00F03372: JR      nz,clearpro (00F0336E)      (D7A1)	; (flags set by subq!) loop if not zero
	00F03374: SUBQ    $1,R13           (182D)				; delay slot - decrement word count
	
	00F03376: MOVE    R12,R21          (8995)				; store end of work buffer in R21

plierlp:
	00F03378: SUBQ    $4,R04           (1884)				; subtract 4 bytes from end of GPUbuf 
pointer
	00F0337A: MOVE    R12,R11          (898B)				; Copy work destination address
	00F0337C: LOAD    (R04),R05        (A485)				; Read dword from gpubuf into R5
	00F0337E: SUBQ    $4,R12           (188C)				; Decrement decbuf pointer by 4
	00F03380: MOVE    R05,R01          (88A1)				; copy read word to R1
	00F03382: RORQ    $10,R01          (7601)				; swap high and low words

; I don't really get this algorithm. Is this the RSA?
; The data is manipulated first from gpubuf->decbuf, then again from decbuf->cubbuf
; There are definately some comment errors with regards to the buffer pointers, so read
; carefully. I'll need to redo this section sometime.
candlp:
	00F03384: SUBQ    $4,R08           (1888)				; decrement by 4 gpubuf work pointer
	00F03386: LOAD    (R08),R09        (A509)				; read the dword there into R9
	00F03388: MOVE    R09,R10          (892A)				; Copy data to R10
	00F0338A: MOVE    R09,R13          (892D)				; and R13
	00F0338C: RORQ    $10,R10          (760A)				; word swap the read data
	00F0338E: MULT    R05,R13          (40AD)				; 16 bit multiply of the low word 
read in plierlp and the low word read here
	00F03390: MOVE    R10,R16          (8950)				; copy the word swapped version of 
this data to R16
	00F03392: MULT    R01,R09          (4029)				; 16 bit multiply of the high word 
read in plierlp and the low word read here
	00F03394: MULT    R05,R16          (40B0)				; ..low word in plierlp and high word 
here
	00F03396: MULT    R01,R10          (402A)				; ..high word in plierlp and high 
word here
; So far: 
; R13 = outer.low * inner.low
; R09 = outer.high * inner.low
; R16 = outer.low * inner.high
; R10 = outer.high * inner.high
; kay!	
	00F03398: ADD     R16,R09          (0209)				; add ol.ih and oh.il
	00F0339A: MOVEQ   0,R16            (8C10)				; zero r16
	00F0339C: ADDC    R16,R16          (0610)				; trick - set R16 to 1 if the 
previous add caused a carry
	00F0339E: RORQ    $10,R09          (7609)				; word swap sum result
	00F033A0: RORQ    $10,R16          (7610)				; word swap carry to match
	00F033A2: MOVE    R09,R17          (8931)				; store the value in R17
	00F033A4: AND     R22,R17          (26D1)				; Mask just the low word (which is 
the high word data?)
	00F033A6: AND     R23,R09          (26E9)				; Mask R9 to just the high word (low 
word data?)
	00F033A8: OR      R17,R16          (2A30)				; OR R17 into R16, giving the carry 
in high word and part of the sum in the low word
	00F033AA: SUBQ    $4,R11           (188B)				; decrement by four pointer into 
decbuf
	00F033AC: ADD     R09,R13          (012D)				; add R9 (high word) into R13 (ol*il)
	00F033AE: LOAD    (R11),R09        (A569)				; Get the current value from decbuf 
(blank first pass) into R9
	00F033B0: ADDC    R10,R16          (0550)				; Adds R10 (oh*ih) into R16, 
including previous addition's carry
	00F033B2: ADD     R13,R09          (01A9)				; Add R13 (created above) into R9 
(read from decbuf)
	00F033B4: MOVE    R11,R10          (896A)				; Save R11 pointer in R10
	00F033B6: STORE   R09,(R11)        (BD69)				; store new value of R9 back into 
decbuf
	00F033B8: SUBQT   $4,R10           (1C8A)				; decrement R10 pointer by 4
	00F033BA: LOAD    (R10),R13        (A54D)				; Read value from decbuf into R13
	00F033BC: ADDC    R16,R13          (060D)				; Add R16 (created above) into R13 
(read from decbuf)
	00F033BE: JR      nc,noCYout (00F033CC)      (D4C4)		; jump ahead if there was no carry
	00F033C0: STORE   R13,(R10)        (BD4D)				; Delay slot - write new R13 back to 
decbuf

; if the last add generated a carry...
CYlp:
	00F033C2: SUBQT   $4,R10           (1C8A)				; decrement decbuf pointer by 4 again
	00F033C4: LOAD    (R10),R13        (A54D)				; read the value there
	00F033C6: ADDC    R18,R13          (064D)				; trick (R18==0) to add the carry to 
the value
	00F033C8: JR      c,CYlp (00F033C2)       (D788)		; If this operation caused a carry, loop 
around again
	00F033CA: STORE   R13,(R10)        (BD4D)				; delay slot, write new R13 back to 
decbuf

; When we are done passing along the carry, continue here	
noCYout:
	00F033CC: CMP     R08,R06          (7906)				; Check if we are at the beginning of 
gpubuf (last dword)
	00F033CE: JUMP    nz,(R19)         (D261)				; if not, loop back to candlp to do 
the next dword
	00F033D0: CMP     R04,R03          (7883)				; delay slot, set up comparison 
between other gpubuf counter
															
; maybe pointless if we jump, 

but used for the jump below
															
	00F033D2: MOVE    R07,R08          (88E8)				; copy end of GPUBuf back into the 
work pointer
	00F033D4: JUMP    nz,(R20)         (D281)				; If the outer loop is not finished, 
then branch back to plierlp
	00F033D6: CMP     R06,R26          (78DA)				; delay slot, set up compare between 
r6 and r26, matches first time (r6=gpubuf), fails 

second (r6=decbuf)
															
; again, not used when we 

branch, only if we don't
															
; this block basically lets us repeat the above code, but from decbuf to cubbuf,
; instead of from gpubuf to decbuf
	00F033D8: MOVE    R27,R13          (8B6D)				; copy byte count for new block back 
into R13
	00F033DA: JR      nz,cubedone (00F033F6)      (D5A1)	; if compare above was not equal, jump out of loop
	00F033DC: ADD     R27,R04          (0364)				; add byte count to R4 to push the 
pointer back to the end of the buffer
	00F033DE: MOVEI   decbuf ($00F03044),R06    (9806)		; load decbuf pointer into R6 (changes the 
compare above)
	00F033E4: MOVEI   cubbuf ($00F03110),R12    (980C)		; get a third buffer into R12 for work
	00F033EA: MOVE    R06,R07          (88C7)				; copy address of decbuf into r7
	00F033EC: ADD     R27,R13          (036D)				; add number of bytes to process back 
into R13 for countdown
	00F033EE: ADD     R13,R07          (01A7)				; add r13 into r7 to get end of 
decbuf
	00F033F0: ADD     R27,R13          (036D)				; adds R27 to R13 again to ensure 
it's double what is needed
	00F033F2: JUMP    (R28)            (D380)				; branch back to multlp to do it one 
more time!
	00F033F4: SHARQ   $2,R13           (6C4D)				; delay slot - divide R13 by 4 to get 
word count

; now we're done the first pass, work with the public key
cubedone:
	00F033F6: MOVEI   cubbuf ($00F03110),R03    (9803)		; get address of cubbuf into r3
	00F033FC: MOVEI   decbuf ($00F03044),R04    (9804)		; get address of decbuf into r4
	00F03402: MOVEI   keybuf ($00F031DC),R08    (9808)		; get address of keybuf into r8
	00F03408: MOVE    R03,R14          (886E)				; cache cubbuf in r14
	00F0340A: MOVE    R04,R15          (888F)				; cache decbuf in r15
	00F0340C: MOVEQ   0,R01            (8C01)				; load zero into r1 for indexing

msbcheck:
	00F0340E: LOAD    (R14+R01),R13    (E82D)				; get data from cubbuf into r13
	00F03410: STORE   R18,(R15+R01)    (F432)				; write a zero into the corresponding 
part of decbuf
	00F03412: OR      R13,R13          (29AD)				; test the read data
	00F03414: JR      z,msbcheck (00F0340E)       (D782)	; if we read zero, loop around again
	00F03416: ADDQ    $4,R01           (0881)				; delay slot - add 4 to index in r1
	
	00F03418: MOVE    R27,R06          (8B66)				; load the byte count into r6
	00F0341A: SUBQ    $4,R01           (1881)				; subtract 4 from index in r1 (last 
word written)
	00F0341C: ADD     R27,R06          (0366)				; add byte count again to R6 (now 
doubled)
	00F0341E: SUB     R01,R06          (1026)				; subtract the count into the buffer 
(should return it to the same value here?)
	00F03420: SHLQ    $3,R06           (63A6)				; multiply the count by 8 (should be 
340 now?)
	00F03422: MOVEI   G_RAM ($00F03000),R14    (980E)		; load R14 with first address of GPU RAM
	00F03428: ADD     R01,R04          (0024)				; add r1 index to get R4 to point to 
end of decbuf
	00F0342A: ADD     R01,R03          (0023)				; add r1 index to get R3 to point to 
end of cubbuf
	00F0342C: ADD     R01,R08          (0028)				; add r1 index to get R8 to point to 
end of keybuf
	00F0342E: MOVE    R04,R15          (888F)				; copy address of end of decbuf to 
R15 
	00F03430: SUBQ    $4,R15           (188F)				; decrement r15 by 4 to get last 
valid data in decbuf
	00F03432: MOVEQ   0,R16            (8C10)				; load 0 in R16 for index

msbit:
	00F03434: ADD     R13,R13          (01AD)				; double R13 (contains read data)
	00F03436: JR      nc,msbit (00F03434)      (D7C4)		; loop until it produces a carry
	00F03438: ADDQ    $1,R16           (0830)				; delay slot - increment r16 (index) 
each loop
	
	00F0343A: MOVEQ   26,R07           (8F47)				; load R7 with '26'
	00F0343C: MOVEI   noshift ($00F0347C),R23    (9817)		; load address of noshift to R23
	00F03442: SUB     R16,R07          (1207)				; subtract the calculated index from 
26
	00F03444: JUMP    c,(R23)          (D2E8)				; if index was less than 26(?), jump 
to noshift
	00F03446: ADDQ    $1,R07           (0827)				; increment r7 (26-index)
	00F03448: ADD     R07,R06          (00E6)				; add to modified byte count (which 
will subtract the negative value)
	00F0344A: NEG     R07              (2007)				; make new index positive
	00F0344C: MOVEQ   0,R23            (8C17)				; load 0 into R23
	00F0344E: NOT     R23              (3017)				; and invert it to get $ffffffff
	00F03450: SH      R07,R23          (5CF7)				; shift (right) by r7 bits, giving a 
mask
	00F03452: MOVE    R23,R22          (8AF6)				; save mask in r22
	00F03454: NOT     R22              (3016)				; invert R22 to get the opposite mask
	00F03456: MOVEQ   0,R01            (8C01)				; load 0 into R1 for index

getkey:
	00F03458: LOAD    (R14+R01),R16    (E830)				; load key from beginning of G_RAM 
into R16
	00F0345A: ROR     R07,R16          (70F0)				; roll bits right by r7 bits

shiftlp:
	00F0345C: ADDQ    $4,R01           (0881)				; increment r1 index by 4
	00F0345E: LOAD    (R14+R01),R17    (E831)				; load next dword of key into R17
	00F03460: ROR     R07,R17          (70F1)				; roll this data by R7 bits, too
	00F03462: MOVE    R17,R13          (8A2D)				; save data in R13
	00F03464: AND     R22,R17          (26D1)				; mask high bits of data read
	00F03466: AND     R23,R13          (26ED)				; mask low bits of data read
	00F03468: OR      R17,R16          (2A30)				; merge high bits into first read 
dword
	00F0346A: CMP     R01,R27          (783B)				; compare index against max byte 
count (68)
	00F0346C: STORE   R16,(R15+R01)    (F430)				; save resulting data into decbuf
	00F0346E: JR      nz,shiftlp (00F0345C)      (D6C1)		; if still bytes to process, loop
	00F03470: MOVE    R13,R16          (89B0)				; delay slot, copy low bits of data 
into r16
	
	00F03472: OR      R16,R16          (2A10)				; done loop, test r16
	00F03474: JR      z,finish0 (00F03488)       (D522)		; if zero, jump to finish0
	00F03476: NOP                      (E400)				; delay slot
	
	00F03478: JR      finish0 (00F03488)         (D4E0)		; otherwise(!) jump to finish0 anyway
	00F0347A: STORE   R16,(R15+R01)    (F430)				; delay slot, save data into decbuf

; This block is run when the index indicates no shifting (zeros?)
noshift:
	00F0347C: MOVEQ   0,R01            (8C01)				; load zero into R1 for index

copkey:
	00F0347E: LOAD    (R14+R01),R13    (E82D)				; load dword from key into R13
	00F03480: ADDQ    $4,R01           (0881)				; increment index by 4
	00F03482: CMP     R27,R01          (7B61)				; check if we have processed 68 bytes
	00F03484: JR      nz,copkey (00F0347E)      (D781)		; if not yet, loop
	00F03486: STORE   R13,(R15+R01)    (F42D)				; delay slot, write data to decbuf

; At this point, decbuf either has the raw public key, or a processed copy of the key
finish0:
	00F03488: MOVE    R15,R05          (89E5)				; copy address of decbuf to R5
	00F0348A: MOVE    R03,R14          (886E)				; copy end address of cubbuf to R14
	00F0348C: ADD     R01,R05          (0025)				; add index to R5 to get end of 
decbuf
	00F0348E: MOVE    R08,R15          (890F)				; copy end address of keybuf to r15
	00F03490: ADD     R27,R14          (036E)				; add '68' to get further into cubbuf
	00F03492: MOVE    R27,R01          (8B61)				; copy 68 into R1 for index
	00F03494: ADD     R27,R15          (036F)				; add '68' to get further into keybuf
	00F03496: ADD     R27,R01          (0361)				; add '68' to double R1 index

altcopy:
	00F03498: SUBQ    $4,R01           (1881)				; decrement index by 4
	00F0349A: LOAD    (R14+R01),R13    (E82D)				; load data? from cubbuf into r13
	00F0349C: JR      nn,altcopy (00F03498)      (D7B4)		; loop if data not negative
	00F0349E: STORE   R13,(R15+R01)    (F42D)				; delay slot, store data in keybuf
	
	00F034A0: MOVEI   divloop ($00F034B0),R07    (9807)		; store address of divloop in r7
	00F034A6: MOVEI   doritesh ($00F034E6),R12    (980C)	; store address of doritesh in r12
	00F034AC: MOVE    R04,R14          (888E)				; copy end of decbuf buffer to r14 
??these must be start of buffer??
	00F034AE: MOVE    R03,R15          (886F)				; copy end of cubbuf buffer to r15 
???

divloop:
	00F034B0: LOAD    (R14),R13        (A5CD)				; load data from decbuf into r13
	00F034B2: LOAD    (R15),R16        (A5F0)				; load data from cubbuf into r16
	00F034B4: CMP     R13,R16          (79B0)				; compare the two read dwords
	00F034B6: JUMP    c,(R12)          (D188)				; if decbuf data is larger(?), jump 
to doritesh
	00F034B8: OR      R13,R16          (29B0)				; delay slot, merge decbuf data into 
cubbuf data
	
; cubbuf data was larger(?check my compare?)	
	00F034BA: JR      nz,skiplfm (00F034C6)      (D4A1)		; result of the or, if not zero, jump to 
skiplfm
	00F034BC: MOVE    R05,R01          (88A1)				; delay slot - copy end address of 
decbuf into r1
	
	00F034BE: ADDQ    $4,R14           (088E)				; increment decbuf address by 4
	00F034C0: ADDQ    $4,R15           (088F)				; increment cubbuf address by 4
	00F034C2: JR      divloop (00F034B0)         (D6C0)		; jump back up to divloop
	00F034C4: ADDQ    $4,R08           (0888)				; delay sloy, increment keybuf 
address by 4

skiplfm:
	00F034C6: SUB     R14,R01          (11C1)				; subtract decbuf address from r1 
index (is it zero?)
	00F034C8: MOVE    R08,R09          (8909)				; copy address of end of keybuf to r9
	00F034CA: ADD     R01,R09          (0029)				; add resulting negative(?) value to 
get... an address?

subloop:
	00F034CC: LOAD    (R14+R01),R13    (E82D)				; load data from decbuf into r13
	00F034CE: LOAD    (R15+R01),R16    (EC30)				; load data from cubbuf into r16
	00F034D0: SUBC    R13,R16          (15B0)				; subtract (with carry from above) 
decbuf from cubbuf
	00F034D2: STORE   R16,(R09)        (BD30)				; store result at resulting address 
(in keybuf?)
	00F034D4: ADDC    R18,R18          (0652)				; get carry into R18 (was 0)
	00F034D6: SUBQ    $4,R01           (1881)				; decrement index by 4
	00F034D8: SUBQT   $4,R09           (1C89)				; decrement (no flags) buffer index 
by 4
	00F034DA: JR      nn,subloop (00F034CC)      (D714)		; if main index not negative, branch back to 
subloop
	00F034DC: SHARQ   $1,R18           (6C32)				; delay slot, shift r18 right (shifts 
carry back into status flag)
	
	00F034DE: JR      c,doritesh (00F034E6)       (D468)	; if bit was set (ie: the decbuf data was less than 
cubbuf), branch
	00F034E0: MOVE    R08,R09          (8909)				; delay slot, load end address of 
keybuf into r9
	
; This lets us work swap the buffers we work on
	00F034E2: MOVE    R15,R08          (89E8)				; copy end address of cubbuf to R8
	00F034E4: MOVE    R09,R15          (892F)				; copy end address of keybuf to r15

doritesh:
	00F034E6: MOVE    R14,R09          (89C9)				; copy end address of decbuf to R9 
(?still wrong, must be start?)
	00F034E8: MOVEQ   0,R16            (8C10)				; load zero in R16

nextrite:
	00F034EA: MOVEQ   0,R18            (8C12)				; load zero in r18
	00F034EC: LOAD    (R09),R13        (A52D)				; read data from decbuf into r13
	00F034EE: SHRQ    $1,R13           (642D)				; divide read data by 2
	00F034F0: ADDC    R18,R18          (0652)				; get shifted bit into R18 from carry
	00F034F2: OR      R16,R13          (2A0D)				; or r16 (initially 0) into remaining 
data
	00F034F4: STORE   R13,(R09)        (BD2D)				; save result back into decbuf
	00F034F6: MOVE    R18,R16          (8A50)				; copy the lsb into r16
	00F034F8: RORQ    $1,R16           (7430)				; roll bit around to msb
	00F034FA: CMP     R05,R09          (78A9)				; see if we are at the end of decbuf
	00F034FC: JR      c,nextrite (00F034EA)       (D6C8)	; loop if not done yet
	00F034FE: ADDQ    $4,R09           (0889)				; delay slot, increment decbuf 
pointer by 4
	
	00F03500: OR      R16,R16          (2A10)				; test r16 (contains carry bit in 
MSB)
	00F03502: JR      z,skiprtm (00F0350A)       (D462)		; if zero, jump to skiptrm
	00F03504: MOVEQ   0,R18            (8C12)				; delay slot, load 0 in R18
	
	00F03506: STORE   R16,(R09)        (BD30)				; store carry bit into decbuf
	00F03508: MOVE    R09,R05          (8925)				; copy address into r5

skiprtm:
	00F0350A: SUBQ    $1,R06           (1826)				; decrement r6 (byte count)
	00F0350C: JUMP    nn,(R07)         (D0F4)				; if not negative, loop back to 
divloop
	00F0350E: NOP                      (E400)				; delay slot

shiftdon:
	00F03510: MOVEFA  R00,R17          (9411)				; get checksum into r17
	00F03512: MOVEFA  R01,R14          (942E)				; get address of 'startcode' into r14
	00F03514: MOVEFA  R02,R07          (9447)				; get block count from cart into r7
	00F03516: MOVE    R27,R01          (8B61)				; get '68' into r1 for index
	00F03518: MOVEI   $000000FF,R22    (9816)				; get byte mask into r22

nextrol:
	00F0351E: MOVEQ   4,R02            (8C82)				; load '4' into R2 for byte count
	00F03520: SUBQ    $4,R01           (1881)				; subtract 4 from r1 index byte 
countdown
	00F03522: MOVEQ   0,R16            (8C10)				; load 0 into r16 for work
	00F03524: LOAD    (R15+R01),R13    (EC2D)				; read word from keybuf or cubbuf 
(depends on above)

rollong:
	00F03526: ADD     R13,R17          (01B1)				; add data into checksum
	00F03528: RORQ    $18,R16          (7710)				; rotate work register by three bytes 
(so msB is in lsB)
	00F0352A: AND     R22,R17          (26D1)				; mask checksum so we have just the 
one byte
	00F0352C: OR      R17,R16          (2A30)				; or into work register
	00F0352E: SUBQ    $1,R02           (1822)				; decrement byte count
	00F03530: JR      nz,rollong (00F03526)      (D741)		; while not zero, loop
	00F03532: RORQ    $8,R13           (750D)				; delay slot, rotate read data for 
next byte
	
	00F03534: ADDQ    $4,R14           (088E)				; increment startcode address by 4 
(because we inc first, data starts at bootII)
	00F03536: CMPQ    $4,R01           (7C81)				; check if R1 is down to last 4 bytes
	00F03538: JR      nz,nextrol (00F0351E)      (D641)		; if not, loop back to nextrol
	00F0353A: STORE   R16,(R14)        (BDD0)				; delay slot, save work register at 
startcode address

; done the checksum loop	
	00F0353C: MOVETA  R17,R00          (9220)				; store the new checksum byte
	00F0353E: MOVETA  R14,R01          (91C1)				; store the new write address from 
startcode
	00F03540: ADDQ    $1,R07           (0827)				; increment the cart block count
	00F03542: MOVEQ   21,R17           (8EB1)				; load '21' into r17
	00F03544: LOAD    (R15),R13        (A5ED)				; load data from keybuf/cubbuf 
(depends on above setup)
	00F03546: CMP     R13,R17          (79B1)				; compare read data against block 
checksum
	00F03548: JR      nz,cartbad (00F03560)      (D561)		; checksum doesn't match, jump
	00F0354A: AND     R22,R07          (26C7)				; delay slot, mask block count to one 
byte
	
	00F0354C: JUMP    nz,(R29)         (D3A1)				; if not done, jump back to blkloop 
to load the next block
	00F0354E: MOVETA  R07,R02          (90E2)				; delay slot, store remaining block 
count
	
; At this point, all blocks are read successfully and written starting at 'startcode'
; all checksums have passed. The only failure so far is if the checksum failed
	00F03550: MOVEI   magic1 ($ABCDEFFF),R04    (9804)		; Get magic value into R4

; This token is used as a stopping point when wiping GPU RAM
; after the cartridge test (on success or failure!)
erastop:
	00F03556: MOVEI   G_RAM ($00F03000),R02    (9802)		; get address of GPU RAM start in R2 (place 
to write token)
	00F0355C: JR      cartgood (00F0356C)         (D4E0)	; branch ahead to good processing
	00F0355E: NOP                      (E400)				; delay slot

cartbad:
	00F03560: MOVEI   magic2 ($12345678),R04    (9804)		; get magic value into R4
	00F03566: MOVEI   bootII ($00F035AC),R02    (9802)		; Get address of decrypted block into R2 
(place to write token)

; Both 'good' and 'bad' routines come into this block of code
cartgood:
	00F0356C: MOVE    R04,R03          (8883)				; copy magic value
	00F0356E: MOVEI   G_RAM ($00F03000),R00    (9800)		; Get base of GPU RAM into R0
	00F03574: MOVEI   erastop ($00F03556),R01    (9801)		; Get address of erastop into R1 so we know 
when to stop!

; This block destroys GPU memory up to 'erastop', to make it
; harder to reverse the decryption code. In the case of a
; bad checksum, it also destroys the decrypted data. One thing
; of note is that the magic value in R3 is only written
; to the first address, the rest of the writes will be zeros.
cartclr:
	00F0357A: STORE   R03,(R00)        (BC03)				; write value at R0 (G_RAM)
	00F0357C: STORE   R03,(R02)        (BC43)				; also write at R2 (either G_RAM or 
bootII)
	00F0357E: ADDQ    $4,R00           (0880)				; Increment both pointers by 4 
(32-bits)
	00F03580: ADDQ    $4,R02           (0882)
	00F03582: CMP     R01,R00          (7820)				; check if R0 > limit (erastop), loop 
if not!
	00F03584: JR      c,cartclr (00F0357A)       (D748)		; loop
	00F03586: MOVEQ   0,R03            (8C03)				; delay slot, clears R3 so subsequent 
writes are 0
	
; Memory overwrite complete	
	00F03588: OR      R04,R04          (2884)				; check if the original magic value 
was negative ($abcdefff is!)
	00F0358A: JR      n,bootIIa (00F0359A)       (D4F8)		; If so, jump ahead to start the decrypted 
code
	00F0358C: MOVEQ   0,R03            (8C03)				; delay slot, clr R3 (which should 
already be 0...)

; Here, we have given up on the boot, and need to halt the GPU so the 68k will see that.	
	00F0358E: MOVEI   G_CTRL ($00F02114),R31    (981F)		; Get the address of G_CTRL

gameover:
	00F03594: STORE   R03,(R31)        (BFE3)				; write the 0 in R3 to G_CTRL, which 
will stop the GPU
	00F03596: JR      gameover (00F03594)         (D7C0)	; wait for it to take effect
	00F03598: NOP                      (E400)				; delay slot

; If we reach here, we are happy with the code we decrypted
; so we go ahead and dive into it. These parameters set up
; addresses to swap that lets the decrypted code verify
; itself (a prevention of running games on clone hardware?)
; These parameters only mean anything if the encrypted code
; uses them (and Atari's encrypted code does - both standard
; and universal 'AB')
bootIIa:
	00F0359A: MOVEI   swapaddr1 ($00F03668),R08    (9808)	; first address to swap in decrypted code
	00F035A0: MOVEI   swapaddr2 ($00F035D0),R25    (9819)	; second address to swap in decrypted code
	00F035A6: MOVEQ   16,R09           (8E09)				; The number 16 in R9

startcode:
	00F035A8: SHLQ    $2,R09           (63C9)				; multiply R9 by 4 (to get 64), step 
for first address (second is fixed to 4)
	00F035AA: MOVEQ   8,R05            (8D05)				; Store '8' in R5, number of swaps to 
do

; Disassembly of the standard cartridge header - normally encrypted!
; This code is decrypted from the cartridge and then runs.
; The universal type AB header has just a few difference which I will note inline
; I will also note areas known to be hard patched by the encryption tool
; ('encryption tool') or the JagCD BIOS ('JagCD')

; This first block here swaps the instructions at the specified addresses
; The Jaguar BIOS specified the addresses and count, so it's possible this code won't
; run without the swap occuring, thus making it even harder to reverse the console
; or run a clone system (except that you can bypass all this code... but that
; was probably the intent).
; The code below is deswapped. The list of addresses swapped by default are:
; 00f03668	00f035d0
; 00f036A8	00f035d4
; 00f036E8	00f035d8
; 00f03728	00f035dc
; 00f03768	00f035e0
; 00f037a8	00f035e4
; 00f037e8	00f035e8
; 00f03828	00f035ec
; Note each swap is 4 bytes, so usually two instructions
;
; decrypted GPU code is placed here and then executed if the checksums matched.
; Interesting data, on entry the flags are all reset (thanks to the SHLQ of a constant value above!)
; Note that the JagCD skips this block and enters at the hash start. Thus it does
; the descramble itself?
bootII:
	00F035AC: LOAD    (R08),R00        (A500)				; read from R8 (initially $0f03668) 
into r0
	00F035AE: LOAD    (R25),R02        (A722)				; read from R25 (initially $0f035d0) 
into r2
	00F035B0: STORE   R00,(R25)        (BF20)				; write value from r8 into r25
	00F035B2: STORE   R02,(R08)        (BD02)				; write value from r25 into r8
	
	00F035B4: ADD     R09,R08          (0128)				; add R9 (set to 64 by bios) to R8 	
	00F035B6: SUBQ    $1,R05           (1825)				; subtract 1 from r5 (loop counter, 
set to 8 by bios)
	00F035B8: JR      nz,bootII (00F035AC)      (D721)		; if not zero yet, jump back to continue 
descrambling
	00F035BA: ADDQ    $4,R25           (0899)				; delay slot, add 4 to R25 
(Swapaddr2)
	
; NOTE: The code below is all POST descramble (ie: the code block above has already run!)	
	
	00F035BC: MOVEI   hashstart ($00F035F4),R02    (9802)	; load jump address in R2
	00F035C2: MOVEI   md5buf ($00F036FC),R07    (9807)		; load work buffer address in R7	
	00F035C8: JUMP    (R02)            (D040)				; jump to hashstart
	00F035CA: NOP                      (E400)				; delay slot

; this data should be different on every cartridge.
; The 128-bit MD5 hash is stored in here, but the entire
; byte block is scrambled because that is the size of
; an encrypted header block. There's no used code in
; these unused blocks, so they can be considered random.
; This entire area is modified by the encryption tool and
; should not contain code if you use that.
unused1:	
	0F035CC:  bss 20	; random data

MD5HASH:	
	00F035E0: dc.l $00000000,$00000000,$00000000,$00000000	; actual hash depends on cartridge

unused2
	00F035F0: bss 8		; random data

jagCDEnter:
hashstart:
; This does an MD5 hash of the cart from $8002c0 to $A00000 (on a 2MB cart)
; This is also where the JagCD enters!
; Before the JagCD enters, it relocates MOVEIs from GPU to DSP addresses
; The affected data is at (using the GPU addresses):
; F035F6, F03626, F0362C, F036FC, F03748, F03794, F037E0, F036CA
; (Not 100% sure the last one is done, but it should be). Note that the comments
; in the original says 5 MOVEIs, and the code confusingly compares against
; the number five, but in truth there are 8 (maybe 7) relocations. In RSAM.DAS
; this table is at relotab and the code is around it.
; Note that only the first four relocations seem meaningful! Not sure if the
; rest are meant as obfuscation or what...
; Remember, of course, that the DSP RAM is at $F1B000, not $F03000, so
; all offsets in there vary appropriately.

	00F035F4: MOVEI   packbuf ($00F0382C),R21    (9815)		; load packbuf into R21 (JagCD patches this 
MOVEI data to DSP RAM)
	00F035FA: MOVEQ   31,R11           (8FEB)				; store '31' into R11 (half of one 
MD5 operation)
	00F035FC: MOVE    R11,R22          (8976)				; copy count into R22
	00F035FE: ADD     R22,R22          (02D6)				; and double it
	00F03600: MOVEI   md5first ($008002C0),R15    (980F)	; cartridge memory after encrypted section? in R15 
(patched by encryption tool!)
	00F03606: MOVEI   md5last ($00A00000),R10    (980A)		; limit in R10 (note: 4MB header has $00c0000 
here) (patched by encryption tool!)
	00F0360C: MOVEI   md5hash0 ($67452301),R16    (9810)	; MD5 hash 0 in R16
	00F03612: MOVEI   md5hash1 ($EFCDAB89),R17    (9811)	; MD5 hash 1 in R17
	00F03618: MOVEI   md5hash2 ($98BADCFE),R18    (9812)	; MD5 hash 2 in R18
	00F0361E: MOVEI   md5hash3 ($10325476),R19    (9813)	; MD5 hash 3 in R19
	00F03624: MOVEI   md5second ($00F0364C),R24    (9818)	; load address of inner loop in R24 (JagCD patches 
this MOVEI data to DSP RAM)
	00F0362A: MOVEI   md5first ($00F03630),R20    (9814)	; load address of outer loop in R20 (JagCD patches 
this MOVEI data to DSP RAM)

md5first:
	00F03630: MOVE    R07,R08          (88E8)				; copy address of md5buf to R8
	00F03632: MOVE    R21,R14          (8AAE)				; copy address of packbuf to r14
	00F03634: MOVEQ   16,R09           (8E09)				; load value 16 into R9 for index
rdloop:
	00F03636: LOAD    (R15),R00        (A5E0)				; get value from cartridge into R0
	00F03638: ADDQ    $4,R15           (088F)				; increment cart address by 4
	00F0363A: STORE   R00,(R14)        (BDC0)				; store data into packbuf
	00F0363C: SUBQ    $1,R09           (1829)				; decrement index counter
	00F0363E: JR      nz,rdloop (00F03636)      (D761)		; if not zero, keep reading (so we read 16 
dwords or 64 bytes)
	00F03640: ADDQT   $4,R14           (0C8E)				; delay slot, increment packbuf ptr 
by 4
	
	00F03642: MOVE    R16,R26          (8A1A)				; copy md5h0 to r26
	00F03644: MOVE    R17,R27          (8A3B)				; copy md5h1 to r27
	00F03646: MOVE    R18,R28          (8A5C)				; copy md5h2 to r28
	00F03648: MOVE    R19,R29          (8A7D)				; copy md5h3 to r29
	00F0364A: MOVE    R21,R14          (8AAE)				; set r14 back to beginning of 
packbuf

md5second:
	00F0364C: LOAD    (R08),R06        (A506)				; read from md5buf table (will get 
continue address md5con $F03664)
	00F0364E: ADDQ    $4,R08           (0888)				; increment work pointer by 4
	00F03650: LOAD    (R08),R04        (A504)				; read next dword from md5buf
	00F03652: ADDQ    $4,R08           (0888)				; increment work pointer by 4
	00F03654: MOVE    R04,R03          (8883)				; copy data 2 to R3
	00F03656: AND     R11,R03          (2563)				; mask it with 31 ($1f)
	00F03658: RORQ    $10,R04          (7604)				; swap low and high words of original 
data 2
	00F0365A: AND     R11,R04          (2564)				; again, mask this one with 31 ($1f) 
too
	00F0365C: LOAD    (R08),R05        (A505)				; read next dword from md5buf into R5
	00F0365E: ADDQ    $4,R08           (0888)				; increment work pointer by 4
	00F03660: JUMP    (R06)            (D0C0)				; jumps to first read address - will 
be md5con $F03664 (ie: next)
	00F03662: MOVEQ   16,R09           (8E09)				; delay slot, reload R9 countdown 
index with 16 dwords
	
md5con:
	00F03664: MOVE    R27,R00          (8B60)				; copy md5h1 to R0
	00F03666: MOVE    R27,R01          (8B61)				; copy md5h1 to R1
	00F03668: AND     R28,R00          (2780)				; 'and' R0 copy with hash2
	00F0366A: NOT     R01              (3001)				; invert R1 copy
	00F0366C: JR      lbl3 (00F0367A)         (D4C0)		; jump ahead
	00F0366E: AND     R29,R01          (27A1)				; delay slot, 'and' inverted copy 
with hash3
	
; This block is consistent in all headers I looked at, but seems to never
; be executed or read as data... was it test code?
unused3:
	00F03670: MOVE    R29,R00          (8BA0)
	00F03672: MOVE    R29,R01          (8BA1)
	00F03674: AND     R27,R00          (2760)
	00F03676: NOT     R01              (3001)
	00F03678: AND     R28,R01          (2781)

lbl3:
	00F0367A: JR      lbl4 (00F0368E)        (D520)			; jump again (more obfuscation?)
	00F0367C: OR      R01,R00          (2820)				; delay slot, merge hashed R1 to 
hashed R0
	
; This block is consistent in all headers I looked at, but seems to never
; be executed or read as data... was it test code?
unused4:
	00F0367E: MOVE    R29,R00          (8BA0)
	00F03680: XOR     R27,R00          (2F60)
	00F03682: JR      lbl4 (00F0368E)         (D4A0)
	00F03684: XOR     R28,R00          (2F80)
	00F03686: MOVE    R29,R00          (8BA0)
	00F03688: NOT     R00              (3000)
	00F0368A: OR      R27,R00          (2B60)
	00F0368C: XOR     R28,R00          (2F80)
	
lbl4:	
	00F0368E: LOAD    (R14+R03),R01    (E861)				; get data from packbuf plus data2 
(masked) offset into R1
	00F03690: ADD     R04,R03          (0083)				; add top word of data2 to R3
	00F03692: ADD     R01,R00          (0020)				; add read data into R0 work hash
	00F03694: LOAD    (R08),R01        (A501)				; read next dword from md5buf into r1
	00F03696: AND     R22,R03          (26C3)				; mask R3 to 62 ($3e)
	00F03698: ADD     R01,R00          (0020)				; add read data 4 to R0 work hash
	00F0369A: ADDQ    $4,R08           (0888)				; increment md5buf work pointer by 4
	00F0369C: ADD     R26,R00          (0340)				; add md5h0 to R0 work hash
	00F0369E: MOVE    R05,R12          (88AC)				; copy md5buf data 3 to R12
	00F036A0: RORQ    $8,R05           (7505)				; roll R5 (md5buf data 3) right by 
one byte
	00F036A2: AND     R11,R12          (256C)				; mask md5buf data 3 original to 31 
($1e)
	00F036A4: ROR     R12,R00          (7180)				; rotate right R0 work hash by result
	00F036A6: ADD     R27,R00          (0360)				; add md5h1 into R0 work hash
	00F036A8: SUBQ    $1,R09           (1829)				; decrement R9 (dword counter) by 1
	00F036AA: MOVE    R29,R26          (8BBA)				; copy md5h3 into r26 (overwriting 
md5h0)
	00F036AC: MOVE    R28,R29          (8B9D)				; copy md5h2 into r29 (overwriting 
md5h3)
	00F036AE: MOVE    R27,R28          (8B7C)				; copy md5h1 into r28 (overwriting 
md5h2)
	00F036B0: JUMP    nz,(R06)         (D0C1)				; if the dword counter did not reach 
0, loop back to md5con (note hashes changed!)
	00F036B2: MOVE    R00,R27          (881B)				; delay slot, move R0 work hash into 
R27 (overwriting md5h1)
	
	00F036B4: CMP     R08,R21          (7915)				; check if R8 equals packbuf
	00F036B6: JUMP    nz,(R24)         (D301)				; if not (not on first entry), jump 
back to md5second
	00F036B8: NOP                      (E400)				; delay slot
	
	00F036BA: ADD     R26,R16          (0350)				; add md5h0 (except not really 
anymore) to original md5h0
	00F036BC: ADD     R27,R17          (0371)				; add md5h1 (except not really 
anymore) to original md5h1
	00F036BE: ADD     R28,R18          (0392)				; add md5h2 (except not really 
anymore) to original md5h2
	00F036C0: ADD     R29,R19          (03B3)				; add md5h3 (except not really 
anymore) to original md5h3
	00F036C2: CMP     R10,R15          (794F)				; check if we are done reading the 
cartridge yet
	00F036C4: JUMP    c,(R20)          (D288)				; if not, jump to md5first
	00F036C6: MOVE    R07,R08          (88E8)				; delay slot, reset md5buf work 
pointer
	
	00F036C8: MOVEI   cartbad ($00F03560),R06    (9806)		; store address of cartbad into R6		
	00F036CE: SUBQ    $4,R25           (1899)				; subtract 4 bytes from swapaddr2 (to 
get last swapped address)

;--- Standard Header ---
	00F036D0: LOAD    (R25),R00        (A720)				; read that data into R0 (from 
md5hash+12 - 00f035ec)
	00F036D2: CMP     R19,R00          (7A60)				; compare against calculated hash 
md5h3
;-----------------------
;-- Universal Header ---
	00F036D0: JR      skiptest (00F036EE)         (D5C0)	; jump past md5 test
	00F036D2: NOP                      (E400)				; delay slot
;-----------------------

; this block is jumped over in the universal header
; test the 128 bit MD5 against what's in the cart header
	00F036D4: JUMP    nz,(R06)         (D0C1)				; if md5hash does not match, jump to 
cartbad routine
	00F036D6: SUBQ    $4,R25           (1899)				; delay slot, decrement swapaddr2 by 
four again (only used if jump not taken)
	
	00F036D8: LOAD    (R25),R00        (A720)				; read data into R0 (from md5hash+8 - 
00f035e8)
	00F036DA: CMP     R18,R00          (7A40)				; compare against calculated hash 
md5h2
	00F036DC: JUMP    nz,(R06)         (D0C1)				; if it does not match, jump to 
cartbad
	00F036DE: SUBQ    $4,R25           (1899)				; delay slot, decrement swapaddr2 by 
four again (only used if jump not taken)
	
	00F036E0: LOAD    (R25),R00        (A720)				; read data into R0 (from md5hash+4 - 
00f035e4)
	00F036E2: CMP     R17,R00          (7A20)				; compare against calculated hash 
md5h1
	00F036E4: JUMP    nz,(R06)         (D0C1)				; if it does not match, jump to 
cartbad
	00F036E6: SUBQ    $4,R25           (1899)				; delay slot, decrement swapaddr2 by 
four again (only used if jump not taken)
	
	00F036E8: LOAD    (R25),R00        (A720)				; read data into R0 (from md5hash - 
00f035e0)
	00F036EA: CMP     R16,R00          (7A00)				; compare against calculated hash 
md5h0
	00F036EC: JUMP    nz,(R06)         (D0C1)				; if it does not match, jump to 
cartbad
	
skiptest:	
	00F036EE: NOP                      (E400)				; target of universal skip, also 
delay slot for above jump
	
	00F036F0: ADDQ    $6,R06           (08C6)				; add 6 to cartbad address (skips 
loading bad magic value)
	00F036F2: MOVEI   $03D0DEAD,R04    (9804)				; puts new magic value into R4 - this 
will release the 68k when written
	00F036F8: JUMP    (R06)            (D0C0)				; jump back to write the tokens and 
stop the GPU
	00F036FA: NOP                      (E400)				; delay slot, do nothing

md5buf:
; More than a work buffer - this table is used to sort of script the
; MD5 hashing code. Note it comes from the encrypted cart data so isn't exploitable
; First value is address to continue execution at (obfs - it's the 'next' address after the jump)
; Second value, low byte (masked to $1f), offset in packbuf to process with hash first
; Second value, low byte of high word added to low byte (masked to $3e)
; third value, low byte is count to rotate work hash by (masked to $1f)
; fourth value, added directly to hash
; The first value (md5con) is patched by the JagCD BIOS into DSP RAM
	00F036Fc: dc.l	md5con ($00F03664),$00040000,$0A0F1419,$D76AA478

; The rest appears to be unused, but if you find it's needed I can add it
; there is actually data through to 00F0382A. If I misread above it may
; be used as part of the hashing as a continuation of the above table

packbuf:
	00F0382C: (work buffer)
	


version 0.1 - 15 Mar 2008
-first release

version 0.2 - not released
-fixed comment on size of blocks around MD5HASH

version 0.3 - 15 Feb 2009
-added updates on JagCD boot and encryption tool patching
-Source for actual boot found on AtariHQ CD, should help understanding RSA in the future


Send updates to tursi via harmlesslion.com

Click here for the main page