+.section .text
+
+.align 2
+.global _err_halt
+_err_halt:
+ b _err_halt
+
+.macro pstring str:req
+ .byte 2f - 1f
+1:
+ .ascii "\str"
+2:
+.endm
+
+.macro def_word name:req
+ .align 2
+\name:
+ .short _\name\()_end - \name // total size
+ .short _\name - \name // offset to code start
+ .short _\name\()_code_end - _\name // code length (minus bx lr)
+ .short 1f - . - 2 // name length
+ .ascii "\name"
+1:
+ .align 1
+_\name:
+.endm
+
+.macro def_end name:req
+_\name\()_code_end:
+ bx lr
+.align 2
+_\name\()_end:
+.endm
+
+def_word halt
+ b _halt
+def_end halt
+
+def_word push /* r0 onto stack */
+ push {r0}
+def_end push
+
+def_word pop /* r0 off of stack */
+ pop {r0}
+def_end pop
+
+def_word dup // ( a -- a a )
+ pop {r0}
+ push {r0}
+ push {r0}
+def_end dup
+
+def_word add // ( a b -- a + b)
+ pop {r0, r1}
+ add r0, r0, r1
+ push {r0}
+def_end add
+
+def_word rep
+ mov r0, pc
+ push {r0}
+def_end rep
+
+def_word jump
+ pop {pc}
+def_end jump
+
+.align 2
+.global Reset_Handler
+Reset_Handler:
+/*
+ ldr r4, =program
+ ldr r5, =output
+ bl compiler
+*/
+ bl setup_input
+ bl setup_words
+ ldr r0, =add
+ bl add_rom_word
+ ldr r0, =halt
+ bl add_rom_word
+ ldr r0, =rep
+ bl add_rom_word
+ ldr r0, =jump
+ bl add_rom_word
+
+ bl interpret_input
+ b _err_halt
+
+setup_input:
+/*
+ ldr r5, =input_counter
+ mov r0, #0
+ str r0, [r5]
+*/
+ ldr r0, =program
+ ldrb r1, [r0]
+ add r1, #1
+ ldr r2, =input_counter
+ mov r3, #0
+1: ldrb r4, [r0, r3]
+ strb r4, [r2, r3]
+ add r3, #1
+ cmp r3, r1
+ bne 1b
+ bx lr
+
+setup_words:
+ ldr r5, =output
+ add r1, r5, #4
+ str r1, [r5] // head (address where next word goes)
+ mov r0, #0
+ str r0, [r5, #4] // next (offset to next word or 0 for end of list)
+ bx lr
+
+add_rom_word:
+ ldr r5, =output
+ ldr r1, [r5] // r1 is now the head address
+ ldrh r2, [r0] // get the number of bytes for the ROM word
+ mov r4, #0 // counter value
+
+1: ldr r3, [r0, r4] // load
+ str r3, [r1, r4] // store
+ add r4, #4
+ cmp r4, r2 // if we've copied all the bytes
+ bne 1b // loop
+
+ add r1, r2 // add to get the next head location
+ str r1, [r5] // store the new head
+ mov r2, #0
+ str r2, [r1] // And make sure the next word offset is end of list
+
+ bx lr
+
+// r8 - in, input buffer address
+// r10 - in/out, input buffer word position
+// r11 - out, word length
+get_next_word:
+ mov r4, r8 // r4 is the local input buffer address
+ ldr r5, =input_counter
+ ldrb r5, [r5] // r5 is the input buffer length
+ mov r0, r10 // r0 is word position
+1:
+ ldrb r2, [r4, r0] // load char
+ cmp r2, #' ' // is it a space?
+ bne 2f // if so, continue
+ add r0, #1 // increment word position
+ cmp r0, r5
+ beq 4f // have we hit the input buffer length?
+ b 1b
+
+2: mov r1, r0
+3: ldrb r2, [r4, r1]
+ cmp r2, #' '
+ beq 4f // is it not a space?
+ add r1, #1
+ cmp r1, r5 // have we hit the input buffer length?
+ bne 3b // if not, continue
+
+4: sub r1, r0 // word length is end - beginning
+ mov r11, r1
+ mov r10, r0
+ bx lr
+
+// based on the input string starting at r10 and with length r11, set r0 to the
+// beginning of the word's code or else set r0 to 0 if the word was not found.
+search_dict:
+ ldr r4, =output
+ add r4, #4 // start at the beginning
+1: ldrh r0, [r4] // load the offset to next word
+ beq 5f // if it's zero, we're done
+
+ mov r0, r4
+ add r0, #8 // r0 is the beginning of the word name string
+ ldrh r3, [r4, #6] // load the name length
+ cmp r3, r11 // is it the same as the length we have in r11?
+ beq 3f // string lengths are the same; compare bytes
+2: ldrh r0, [r4] // else load the offset to next word
+ add r4, r0 // add to the word pointer
+ b 1b // and try the next word
+
+3: ldr r5, =input_buffer
+ ldr r6, =input_counter
+ ldrb r6, [r6]
+ mov r1, r10 // load input buffer position
+ add r5, r1 // r5 is now a pointer into the input buffer
+ mov r1, #0 // set up index
+4: ldrb r2, [r0, r1] // load dict byte
+ ldrb r3, [r5, r1] // load input byte
+ cmp r2, r3 // are they the same?
+ bne 2b // if not, go to the next dict word
+ add r1, #1 // increment
+ cmp r1, r11 // have we checked all the bytes in the word?
+ bne 4b // if not, check next byte
+
+ // success!
+ ldrh r2, [r4, #2] // get offset to code start
+ add r0, r4, r2 // add to word start
+ add r0, #1 // and add one to make it a valid thumb interwork address
+ bx lr
+
+ // the word was not found; set r0 to 0
+5: mov r0, #0
+ bx lr
+
+interpret_input:
+ mov r9, lr
+ ldr r0, =input_buffer
+ mov r8, r0
+ mov r0, #0
+ mov r10, r0
+1:
+ bl get_next_word
+ mov r4, r8
+ mov r0, r10
+ ldrb r2, [r4, r0] // get the first character
+ cmp r2, #'\'' // is it an apostrophe?
+ bne 2f
+ bl push_str
+ b 4f
+2: cmp r2, #'$' // is it a dollar sign?
+ bne 3f
+ bl push_lit
+ b 4f
+3: bl search_dict // must be a word
+ beq 5f // word not found
+ blx r0 // branch to found word
+4: mov r0, r10
+ mov r1, r11
+ add r0, r1 // move input pointer to next word
+ mov r10, r0
+ b 1b
+5: bx r9
+
+push_str:
+ mov r0, r10
+ add r0, #1 // increment character counter
+ mov r4, r8
+1: ldrb r1, [r4, r0] // load character
+ push {r1} // push to stack
+ add r0, #1 // increment counter
+ cmp r0, r11 // at the end?
+ bne 1b // repeat
+ bx lr
+
+push_lit:
+ mov r0, r10 // input position
+ mov r1, r11 // input length
+ mov r4, r8 // input buffer
+ add r5, r0, r1 // calculate end position
+ add r3, r0, #1 // increment character counter
+ mov r7, #0 // accumulator
+1: ldrb r2, [r4, r3] // load character
+ cmp r2, #'0'
+ blt 4f // not in range
+ cmp r2, #'9'
+ bgt 3f // not a digit, but possibly still a letter
+ // a digit
+ sub r2, #0x30 // subtract 0x30
+ b 5f
+
+3: cmp r2, #'a'
+ blt 4f // not in range
+ cmp r2, #'f'
+ bgt 4f // not in range
+ // a letter a-f
+ sub r2, #0x57 // subtract offset to get value
+ b 5f
+
+ // error parsing literal
+4: push {lr}
+ bl _err_halt
+
+5:
+ lsl r7, #4 // shift accumulator
+ orr r7, r2 // or in the value
+ add r3, #1 // add to the counter
+ cmp r3, r5 // are we done?
+ bne 1b // continue
+ push {r7}
+ bx lr
+
+.ltorg
+
+.section .rodata
+.align 2
+program:
+ pstring "$0 rep $100 add jump halt"
+
+.section .data
+input_counter:
+ .skip 1
+input_buffer:
+ .skip 255
+output: