/compile.s
.include "macros.inc"

.section .text

.type prepare_scratchword, %function
prepare_scratchword:
	ldr r4, =environment  // get environment tail pointer
	ldr r4, [r4]          // load environment tail addr
	add r4, #10           // move past the header
	ldr r5, =scratch_pointer
	str r4, [r5]          // store scratch pointer
	bx lr

.global execute_scratchword
.type execute_scratchword, %function
execute_scratchword:
	ldr r4, =environment
	ldr r4, [r4]
	add r4, #11           // beyond the header, plus 1 for thumb
	bx r4

// append the halfword instruction in r0 to the scratch word
.type scratch_append_halfword, %function
scratch_append_halfword:
	ldr r1, =scratch_pointer
	ldr r2, [r1]          // load the scratch pointer
	strh r0, [r2]         // store data
	add r2, #2            // increment two bytes
	str r2, [r1]          // update scratch pointer
	bx lr

// append the word in r0 to the scratch word. Store as two halfwords to support
// unaligned writes.
.type scratch_append_word, %function
scratch_append_word:
	ldr r1, =scratch_pointer
	ldr r2, [r1]          // load the scratch pointer
	strh r0, [r2]         // store first half
	lsr r0, #16           // shift last half
	strh r0, [r2, #2]     // store last half
	add r2, #4            // increment four bytes
	str r2, [r1]          // update scratch pointer
	bx lr

// parses an input string into an integer in r0
.global parse_lit
.type parse_lit, %function
parse_lit:
	mov r0, r10           // input position
	mov r1, r11           // input length
	mov r4, r8            // input buffer
	add r5, r0, r1        // calculate end position
	add r3, r0, #1        // increment character counter
	mov r0, #0            // accumulator
1:	ldrb r2, [r4, r3]     // load character
	cmp r2, #'0'
	blt 4f                // not in range
	cmp r2, #'9'
	bgt 3f                // not a digit, but possibly still a letter
	// a digit
	sub r2, #0x30         // subtract 0x30
	b 5f

3:	cmp r2, #'a'
	blt 4f                // not in range
	cmp r2, #'f'
	bgt 4f                // not in range
	// a letter a-f
	sub r2, #0x57         // subtract offset to get value
	b 5f

	// error parsing literal
4:	push {lr}
	bl _err_halt

5:
	lsl r0, #4            // shift accumulator
	orr r0, r2            // or in the value
	add r3, #1            // add to the counter
	cmp r3, r5            // are we done?
	bne 1b                // continue
	bx lr

.align 2
_indirect_word_template:
	mov r0, pc
	add r0, #(1f - . + 3)
	mov lr, r0
	ldr r0, 1f
	bx r0
	nop
1:	// target address goes here

_indirect_lit_template:
	ldr r0, 1f
	push {r0}
	b 1f + 4
	nop
1:	// literal word goes here

_word_header_template:
	mov r0, lr
	push_cs

_word_footer_template:
	pop_cs
	bx r0
	nop

.align 2
_lit_mov_template:
	mov r0, #0
	push {r0}

.type compile_lit, %function
compile_lit:
	push {r4, lr}
	bl parse_lit
	mov r1, #0xFF
	and r1, r0
	cmp r0, r1
	beq _compile_lit_byte
	push {r0}
	ldr r4, =_indirect_lit_template
	ldr r0, [r4]
	bl scratch_append_word
	ldr r0, [r4, #4]
	bl scratch_append_word
	pop {r0}
	bl scratch_append_word
	pop {r4, pc}
_compile_lit_byte:
	ldr r4, =_lit_mov_template
	ldr r4, [r4]
	orr r0, r4
	bl scratch_append_word
	pop {r4, pc}

.align 2
_branch_template:
	// a BL instruction with the immediates masked out. All compiled
	// branches to words will be negative offsets, so we start with S, J1,
	// and J2 set for negative values.
	.short 0xF400
	.short 0xF800

.type compile_word, %function
compile_word:
	push {r4, r5, r6, lr}
	mov r4, r0                // save beginning of word structure
	add r0, #10               // skip header
	// r0 is now the start of code

	ldrb r2, [r4, #9]         // get flags byte
	mov r3, #1
	and r2, r3                // select inline bit
	beq _compile_word_as_bl   // not inline? skip to branch builder

	// copy inline code
	ldrh r3, [r4, #2]         // get code length
	cmp r3, #0
	beq _compile_word_fixup   // if this is a zero-length word, we can skip this part
	ldr r6, =scratch_pointer
	ldr r5, [r6]              // load scratch pointer
	mov r2, #0                // initialize index
1:	ldrh r1, [r0, r2]         // load instruction
	strh r1, [r5, r2]         // store instruction
	add r2, #2                // increment
	cmp r2, r3                // are we done?
	bne 1b                    // loop
	add r5, r3                // add the number of bytes copied to scratch pointer
	str r5, [r6]              // store it back
	b _compile_word_fixup     // continue at fixup step

// Every word reference compiles to a relative BL instruction, which is a full
// 4 bytes.
_compile_word_as_bl:
	ldr r1, =scratch_pointer
	ldr r1, [r1]              // r1 is now the location we're assembling to
	add r1, #4                // Add PC reference offset
	sub r0, r1                // calculate the offset
	lsr r0, #1                // chomp the lowest bit
	// r0 is now the branch offset value

	ldr r2, =_branch_template
	ldr r2, [r2]              // loaded template
	mov r1, #0xFF
	lsl r1, #3
	mov r3, #0x7
	orr r1, r3                // calculate mask for lower bits
	and r1, r0                // mask lower bits
	lsl r1, #16               // shift into upper half
	orr r2, r1                // and deposit them into the template
	mov r1, #0xFF
	lsl r1, #2
	mov r3, #0x3
	orr r1, r3  
	lsl r1, #11               // calculate mask for upper bits
	and r1, r0                // mask
	lsr r1, #11               // shift into lower half
	orr r2, r1                // deposit
	mov r0, r2
	bl scratch_append_word
	// branch instruction has now been written to scratch area

_compile_word_fixup:
	ldr r0, [r4, #4]          // load the compile-time fixup
	cmp r0, #0
	beq 1f                    // No fixup? Skip it.
	blx r0
1:	pop {r4, r5, r6, pc}


.type define_word, %function
define_word:
	push {r4, r5, r7, lr}
	bl compile_footer         // go ahead and compile the end of the function
	ldr r4, =environment
	ldr r4, [r4]              // load environment tail pointer
	mov r7, r4                // store the beginning for later
	add r4, #10               // move beyond the header into the code start
	ldr r0, =scratch_pointer
	ldr r0, [r0]              // load scratch pointer
	sub r0, r4                // calculate code length
	strh r0, [r7, #2]         // store in header
	add r4, r0                // advance to start of word name
	mov r5, r8                // get input pointer
	mov r0, r10               // get text position
	add r5, r0
	add r5, #1                // move input pointer to start of name beyond colon
	mov r1, r11               // get word length
	sub r1, #1                // subtract the colon from word length
	strb r1, [r7, #8]         // store word length in header
	mov r0, #0                // initialize index
	strb r0, [r7, #9]         // also store zero in flags
	str r0, [r7, #4]          // zero compile-time support function pointer
1:	ldrb r2, [r5, r0]         // load byte from name
	strb r2, [r4, r0]         // store byte
	add r0, #1
	cmp r0, r1                // compare length
	bne 1b                    // and loop
	add r4, r0                // move output pointer to end of string
	mov r0, #3
	and r0, r4                // are the lower two bits zero?
	beq 4f                    // if they are, we're good
	eor r4, r0                // otherwise clear the lower bits
	add r4, #4                // increment to the next word boundary
4:	sub r1, r4, r7            // calculate total size
	strh r1, [r7]             // store total size in header
	mov r0, #0
	str r0, [r4]              // zero next word start
	ldr r3, =environment
	str r4, [r3]              // store new tail pointer
	// cleanup
	bl prepare_scratchword    // reinitialize scratch word space
	bl compile_header         // place a new header there
	pop {r4, r5, r7, pc}

.global compile
.type compile, %function
compile:
	push {lr}
	bl prepare_scratchword
	bl compile_header
	mov r0, #0
	mov r10, r0
1:
	bl get_next_word
	mov r4, r8
	mov r0, r10
	ldrb r2, [r4, r0]     // get the first character
	cmp r2, #'$'          // is it a literal value?
	beq _compile_lit
	cmp r2, #':'          // is it a word definition?
	beq _compile_def
	// else it's a word
	bl search_environment        // must be a word
	beq _compile_word_not_found
	bl compile_word       // compile the word
	b 4f
_compile_def:
	bl define_word
	b 4f
_compile_lit:
	bl compile_lit

4:	mov r0, r10
	mov r1, r11
	add r0, r1            // move input pointer to next word
	ldr r1, =input_counter
	ldrb r1, [r1]
	sub r1, #1
	cmp r0, r1
	bge _compile_footer   // if the next position is at or beyond the input buffer size, we're done
	mov r10, r0
	b 1b
_compile_footer:
	bl compile_footer
6:	pop {pc}
_compile_word_not_found:
	ldr r0, =_word_not_found_str
	bl putstr
	b _err_halt

.type compile_header, %function
compile_header:
	push {r4, lr}
	ldr r4, =_word_header_template
	ldr r0, [r4]
	bl scratch_append_word
	ldr r0, [r4, #4]
	bl scratch_append_word
	pop {r4, pc}

.type compile_footer, %function
compile_footer:
	push {r4, lr}
	ldr r4, =_word_footer_template
	ldr r0, [r4]
	bl scratch_append_word
	ldr r0, [r4, #4]
	bl scratch_append_word
	ldr r0, [r4, #8]
	bl scratch_append_word
	pop {r4, pc}