/compiler.pas
unit Compiler;
interface
uses Opcode, SymTable;
const COMPILE_BUFFER_SIZE: Word = 65535;
type
SegArray = array[0..32767] of Byte;
PSegArray = ^SegArray;
CompileBuffer = object
data: PSegArray;
data_len: Word;
constructor Init;
destructor Done;
(const bytes: Pointer; const len: Word);
(const b: Byte);
(const w: Word);
(const len: Word);
(var f: File);
end;
CompiledObject = record
buffer: CompileBuffer;
name: string;
segment_name: string;
symbols: SymbolTable;
public_symbols: SymbolTable;
end;
OpArray = array[1..3] of Operand;
var currentObj: ^CompiledObject;
(const filename: string; var obj: CompiledObject);
implementation
uses Parse, Util;
type PartsKind = (partsNone, partsInstruction, partsDirective);
Parts = record
label_or_name: string[32];
mnemonic: string[7];
case Kind: PartsKind of
partsInstruction: (
op1: string[32];
op2: string[32];
op3: string[32];
);
partsDirective: (
args: string[128];
)
end;
DataDefType = (DataDB, DataDW, DataDD);
(var pts: Parts);
begin
pts.Kind := partsNone;
pts.label_or_name := '';
pts.mnemonic := '';
pts.op1 := '';
pts.op2 := '';
pts.op3 := '';
pts.args := '';
end;
(const pts: Parts);
begin
if pts.kind = partsNone then
Writeln('=== NONE')
else if pts.kind = partsInstruction then begin
Writeln('=== Instruction label: ', pts.label_or_name, '; ',
'mnemonic: ', pts.mnemonic, '; ',
'op1: ', pts.op1, '; ',
'op2: ', pts.op2, '; ',
'op3: ', pts.op3, ';');
end
else if pts.kind = partsDirective then begin
Writeln('=== Directive label:', pts.label_or_name, '; ',
'directive: ', pts.mnemonic, '; ',
'args: ', pts.args, '; ');
end;
end;
constructor CompileBuffer.Init;
begin
New(data);
end;
destructor CompileBuffer.Done;
begin
Dispose(data);
end;
(const bytes: Pointer; const len: Word);
var i: Integer;
b: ^Byte;
begin
b := bytes;
for i := 0 to len - 1 do begin
data^[data_len + i] := b^;
Inc(b);
end;
data_len := data_len + len;
end;
(const b: Byte);
begin
data^[data_len] := b;
Inc(data_len);
end;
(const w: Word);
begin
data^[data_len] := Lo(w);
Inc(data_len);
data^[data_len] := Hi(w);
Inc(data_len);
end;
(const len: Word);
begin
data_len := data_len + len;
end;
(var f: File);
begin
BlockWrite(f, data, data_len);
end;
(const s: String; var pts: Parts);
var i, j: Byte;
a, b: String[32];
begin
i := 1;
ConsumeWhitespace(s, i);
a := ToUpCase(ConsumeIdentifier(s, i));
if s[i] = ':' then begin { this is a label }
pts.label_or_name := a;
Inc(i);
ConsumeWhitespace(s, i);
if i > Length(s) then
Exit;
a := ToUpCase(ConsumeIdentifier(s, i));
end;
ConsumeWhitespace(s, i);
j := i; { save position }
b := ToUpCase(ConsumeIdentifier(s, j));
{ EQU uniquely and annoyingly has a name before and an arg after }
if b = 'EQU' then begin
pts.Kind := partsDirective;
pts.label_or_name := a;
pts.mnemonic := b;
ConsumeWhitespace(s, j);
pts.args := Copy(s, j, Length(s) - j + 1);
Exit;
end
{ Things which have a name but no other args }
else if (b = 'SEGMENT') or (b = 'STRUC') or (b = 'ENDS') then begin
pts.Kind := partsDirective;
pts.label_or_name := a;
pts.mnemonic := b;
Exit;
end
{ Then everything that is just a directive and arguments }
else if (a = 'DB') or (a = 'DW') or (a = 'DD') or (a = 'DS') or
(a = 'ORG') or (a = 'NAME') or (a = 'PUBLIC') or
(a = 'EXTRN') then begin
pts.Kind := partsDirective;
pts.mnemonic := a;
ConsumeWhitespace(s, i);
pts.args := Copy(s, i, Length(s) - i + 1);
Exit;
end
{ And finally, standalone directives }
else if (a = 'EVEN') or (a = 'END') then begin
pts.Kind := partsDirective;
pts.mnemonic := a;
Exit;
end;
{ Everything left must be an instruction }
pts.Kind := partsInstruction;
pts.mnemonic := a;
ConsumeWhitespace(s, i);
pts.op1 := ConsumeUntilComma(s, i);
Trim(pts.op1);
ConsumeWhitespaceAndCommas(s, i);
if i > Length(s) then
Exit;
pts.op2 := ConsumeUntilComma(s, i);
Trim(pts.op2);
ConsumeWhitespaceAndCommas(s, i);
if i > Length(s) then
Exit;
pts.op3 := ConsumeUntilComma(s, i);
Trim(pts.op3);
end;
(const s: String);
var i: Byte;
n: String[32];
begin
i := 1;
repeat
n := ConsumeUntilComma(s, i);
if Length(n) > 0 then begin
DefineSymbol(currentObj^.public_symbols, ToUpCase(Trim(n)), 0);
ConsumeWhitespaceAndCommas(s, i);
end;
until Length(n) = 0;
end;
(const s: String; width: DataDefType);
var i, j: Byte;
n: Word;
def: String[32];
begin
i := 1;
repeat
def := ConsumeUntilComma(s, i);
if Length(def) > 0 then begin
Trim(def);
if def[1] = '''' then
case width of
dataDB: begin
for j := 2 to Length(def) do begin
if def[j] = '''' then
Break;
currentObj^.buffer.AddByte(Byte(def[j]));
end;
if j < Length(def) then
DieBadOperand('Trailing garbage after terminal '' in string');
end;
DataDW: begin
if Length(def) = 3 then
n := Word(def[2])
else if Length(def) = 4 then
n := (Word(def[2]) shl 8) or Word(def[3])
else
DieBadOperand('Malformed string in DW definition');
currentObj^.buffer.AddWord(n);
end;
DataDD: DieBadOperand('Cannot use strings in DD definition');
end
else begin
if not ParseNumericExpression(def, n) then
DieBadOperand(def);
case width of
DataDB: currentObj^.buffer.AddByte(Lo(n));
DataDW: currentObj^.buffer.AddWord(n);
DataDD: begin
{ Uhhh punting here as I don't really understand what
values are allowed in a DD. This only allows 16-bit
values and pads them to 32-bit. }
currentObj^.buffer.AddWord(n);
currentObj^.buffer.AddWord(0);
end;
end;
end;
Inc(i); { skip comma }
ConsumeWhitespace(s, i);
end;
until Length(def) = 0;
end;
(const pts: Parts; var m: Mnemonic; var op: OpArray; var ie: PInstructionEncoding);
var i, lo_enc, hi_enc: Integer;
begin
m := ParseMnemonic(pts.mnemonic);
if m = mInvalid then begin
ErrorAtLine('Invalid mnemonic: ' + pts.mnemonic);
Halt(1);
end;
GetMnemonicEncodingBounds(m, lo_enc, hi_enc);
if lo_enc = High(IEnc) then begin
ErrorAtLine('Internal error: mnemonic not found');
Halt(128);
end;
{ iterate over possible encodings }
for i := lo_enc to hi_enc do begin
op[1].Kind := OpAbsent;
op[2].Kind := OpAbsent;
op[3].Kind := OpAbsent;
ie := @IEnc[i];
{ Writeln(' ie ', i, ': ', m, ' ', ie^.o1, ' ', ie^.o2, ' ', ie^.o3); }
if (ie^.o1 <> OpAbsent) and not ParseOperand(pts.op1, op[1], ie^.o1) then
Continue;
if (ie^.o2 <> OpAbsent) and not ParseOperand(pts.op2, op[2], ie^.o2) then
Continue;
if (ie^.o3 <> OpAbsent) and not ParseOperand(pts.op3, op[3], ie^.o3) then
Continue;
Exit;
end;
{ We didn't find anything }
ErrorAtLine('Invalid operands for ' + MnemonicStr(m));
Writeln('Possible valid options:');
for i := lo_enc to hi_enc do begin
Writeln(' ', MnemonicStr(m), ' ', ShortOperandStr(IEnc[i].o1),
' ', ShortOperandStr(IEnc[i].o2),
' ', ShortOperandStr(IEnc[i].o3));
end;
Halt(1);
end;
(const pts: Parts);
var i: Integer;
instr: array[0..5] of Byte;
instr_len: Byte;
m: Mnemonic;
op: OpArray;
ie: PInstructionEncoding;
rbp: Byte;
disp: Integer;
begin
instr_len := 2;
if pts.mnemonic = '' then
Exit;
FindInstruction(pts, m, op, ie);
instr[1] := Byte(ie^.op);
instr[0] := Byte(ie^.w);
if ie^.wb <> wLocation then
instr[0] := instr[0] OR (Byte(ie^.wb) SHL 3);
if ((m = mMOV) or (m = mMOVB)) AND (ie^.o1 = OpMemory) AND (ie^.o2 = OpMemory) then begin
{ mem-mem MOV is a special case that emits two instructions back-to-back. Note that the
source (the second operand) is first. }
instr[1] := instr[1] OR Byte(op[2].mm);
instr[0] := instr[0] OR (Byte(op[2].aa) SHL 1);
if op[2].aa = BaseOffset then begin
instr[2] := Lo(op[2].offset);
Inc(instr_len);
end;
currentObj^.buffer.Add(@instr, instr_len);
instr_len := 2;
instr[1] := $CC OR Byte(op[1].mm);
instr[0] := Byte(ie^.w) OR (Byte(op[1].aa) SHL 1);
if op[1].aa = BaseOffset then begin
instr[2] := Lo(op[1].offset);
Inc(instr_len);
end;
currentObj^.buffer.Add(@instr, instr_len);
Exit;
end
else begin
for i := 1 to 3 do begin
case op[i].Kind of
OpMemory: begin
instr[1] := instr[1] OR Byte(op[i].mm);
instr[0] := instr[0] OR (Byte(op[i].aa) SHL 1);
if op[i].aa = BaseOffset then begin
instr[2] := Lo(op[i].offset);
Inc(instr_len);
end;
end;
OpRegister:
rbp := Byte(op[i].reg);
OpPointer:
rbp := Byte(op[i].preg);
OpImmediate: begin
case ie^.wb of
wOne: begin
if op[i].immed > 255 then
WarnAtLine('byte immediate is > 255');
instr[instr_len] := Lo(op[i].immed);
Inc(instr_len);
end;
wTwo: begin
instr[instr_len] := Lo(op[i].immed);
Inc(instr_len);
instr[instr_len] := Hi(op[i].immed);
Inc(instr_len);
end;
end;
end;
OpLocation: begin
{ The displacement is added to TP at the time of execution, which annoyingly
is already advanced to the instruction after the jump. So where we calculate
from depends on whether it's a one-byte or two-byte displacement, and we
don't know whether we need a two-byte displacement until we calculate the
displacement. :|
I *think* this errs on the side of making a two-byte displacement for a case
that would only need one, and not the other way around. But the proof will be
in the testing. }
disp := Integer(op[i].loc) - Integer(currentObj^.buffer.data_len + instr_len + 1);
if (ie^.wb = wTwo) OR (disp > 128) OR (disp < -127) then begin
Dec(disp); { displacement origin moves one byte forward with two-byte displacement }
instr[instr_len] := Lo(Word(disp));
Inc(instr_len);
instr[instr_len] := Hi(Word(disp));
Inc(instr_len);
{ Not totally sure if it's necessary to also adjust W, but here it is }
instr[0] := instr[0] OR (Byte(wTwo) SHL 3) OR 1;
end
else begin
instr[instr_len] := Byte(Shortint(disp));
Inc(instr_len);
instr[0] := instr[0] OR (Byte(wOne) SHL 3);
end;
end;
OpBit:
rbp := Byte(op[i].bit);
OpWidth:
if op[i].width = widByte then
rbp := 0
else
rbp := 1;
end;
end;
end;
case ie^.rbp of
rbpReg, rbpBit, rbpPtrReg: instr[0] := instr[0] OR (rbp SHL 5);
{ rbpNone does nothing }
rbpHlt: instr[0] := instr[0] OR $20;
rbpSintr: instr[0] := instr[0] OR $40;
rbpJmp: instr[0] := instr[0] OR $80;
rbpWid: instr[0] := instr[0] OR (1 SHL 7)
OR (Byte(op[1].width) SHL 6)
OR (Byte(op[2].width) SHL 5);
rbpXfer: instr[0] := instr[0] OR $60;
end;
currentObj^.buffer.Add(@instr, instr_len);
{ Write('mnemonic: ', m, '; op1: ');
DumpOperand(op[1]);
Write('; op2: ');
DumpOperand(op[2]);
Writeln(''); }
end;
(const pts: Parts);
var op: Operand;
begin
if pts.mnemonic = 'EQU' then begin
if not ParseOperand(pts.args, op, OpImmediate) then
DieBadOperand(pts.args);
DefineSymbol(currentObj^.symbols, pts.label_or_name, op.immed);
end
else if pts.mnemonic = 'DB' then begin
DefineData(pts.args, DataDB);
end
else if pts.mnemonic = 'DW' then begin
DefineData(pts.args, DataDW);
end
else if pts.mnemonic = 'DD' then
begin
DefineData(pts.args, DataDD);
end
else if pts.mnemonic = 'DS' then begin
if not ParseOperand(pts.args, op, OpImmediate) then
DieBadOperand(pts.args);
currentObj^.buffer.Increment(op.immed);
end
else if pts.mnemonic = 'NAME' then begin
currentObj^.name := pts.op1;
end
else if pts.mnemonic = 'SEGMENT' then begin
if currentObj^.segment_name = '' then
currentObj^.segment_name := pts.label_or_name
else
DieBadOperand('Segment has already been defined');
end
else if pts.mnemonic = 'ENDS' then begin
if pts.label_or_name <> currentObj^.segment_name then
DieBadOperand('Segment end name is not the same as the beginning');
{ TODO: make sure no instructions appear outside of the segment bounds }
end
else if pts.mnemonic = 'PUBLIC' then begin
AddPublic(pts.args);
end
else if pts.mnemonic = 'EXTRN' then begin
ErrorAtLine('EXTRN not yet implemented');
Halt(1);
end
else if pts.mnemonic = 'END' then begin
{ Not sure what to do with this }
end;
end;
(const filename: string; var obj: CompiledObject);
var f: Text;
pts: Parts;
begin
currentObj := @obj; { poor man's `this` pointer }
obj.buffer.Init;
Assign(f, filename);
Reset(f);
currentLine := 0;
while not Eof(f) do begin
Readln(f, linebuf);
Inc(currentLine);
{ Writeln('*** ', linebuf); }
StripComments(linebuf);
if Length(linebuf) = 0 then { nothing left on the line }
Continue;
PartsClear(pts);
PickParts(linebuf, pts);
{ PartsDump(pts); }
if (pts.label_or_name <> '') and
not ((pts.mnemonic = 'EQU') or (pts.mnemonic = 'STRUC') or
(pts.mnemonic = 'SEGMENT') or (pts.mnemonic = 'ENDS')) then
DefineSymbol(obj.symbols, pts.label_or_name, obj.buffer.data_len);
case pts.Kind of
partsDirective: CompileDirective(pts);
partsInstruction: CompileInstruction(pts);
end;
end;
{ DumpSymbols; }
end;
end.