325 lines
11 KiB
C
325 lines
11 KiB
C
#include "vslc.h"
|
|
|
|
// This header defines a bunch of macros we can use to emit assembly to stdout
|
|
#include "emit.h"
|
|
|
|
// In the System V calling convention, the first 6 integer parameters are passed in registers
|
|
#define NUM_REGISTER_PARAMS 6
|
|
static const char* REGISTER_PARAMS[6] = {RDI, RSI, RDX, RCX, R8, R9};
|
|
|
|
// Takes in a symbol of type SYMBOL_FUNCTION, and returns how many parameters the function takes
|
|
#define FUNC_PARAM_COUNT(func) ((func)->node->children[1]->n_children)
|
|
|
|
static void generate_stringtable(void);
|
|
static void generate_global_variables(void);
|
|
static void generate_function(symbol_t* function);
|
|
static void generate_expression(node_t* expression);
|
|
static void generate_statement(node_t* node);
|
|
static void generate_main(symbol_t* first);
|
|
|
|
// Entry point for code generation
|
|
void generate_program(void)
|
|
{
|
|
generate_stringtable();
|
|
generate_global_variables();
|
|
|
|
// This directive announces that the following assembly belongs to the .text section,
|
|
// which is the section where all executable assembly lives
|
|
DIRECTIVE(".text");
|
|
|
|
// TODO (Task 3):
|
|
// For each function in global_symbols, generate it using generate_function ()
|
|
bool found = false;
|
|
symbol_t *entry;
|
|
for (int i = 0; i < global_symbols->n_children; i++) {
|
|
symbol_t *sym = global_symbols->children[i];
|
|
if (sym->type == SYMBOL_FUNCTION) {
|
|
if (!found) {
|
|
entry = sym;
|
|
found = true;
|
|
}
|
|
generate_function(sym);
|
|
}
|
|
}
|
|
|
|
// In VSL, the topmost function in a program is its entry point.
|
|
// We want to be able to take parameters from the command line,
|
|
// and have them be sent into the entry point function.
|
|
//
|
|
// Due to the fact that parameters are all passed as strings,
|
|
// and passed as the (argc, argv)-pair, we need to make a wrapper for our entry function.
|
|
// This wrapper handles string -> int64_t conversion, and is already implemented.
|
|
// call generate_main ( <entry point function symbol> );
|
|
generate_main(entry);
|
|
}
|
|
|
|
// Prints one .asciz entry for each string in the global string_list
|
|
static void generate_stringtable(void)
|
|
{
|
|
// This section is where read-only string data is stored
|
|
// It is called .rodata on Linux, and "__TEXT, __cstring" on macOS
|
|
DIRECTIVE(".section %s", ASM_STRING_SECTION);
|
|
|
|
// These strings are used by printf
|
|
DIRECTIVE("intout: .asciz \"%s\"", "%ld");
|
|
DIRECTIVE("strout: .asciz \"%s\"", "%s");
|
|
// This string is used by the entry point-wrapper
|
|
DIRECTIVE("errout: .asciz \"%s\"", "Wrong number of arguments");
|
|
|
|
// TODO (Task 1): Print all strings in the program here, with labels you can refer to later
|
|
// You have access to the global variables string_list and string_list_len from symbols.c
|
|
for (int i = 0; i < string_list_len; i++) {
|
|
DIRECTIVE("string%d:\t.asciz \"%s\"", i, string_list[i]);
|
|
}
|
|
}
|
|
|
|
// Prints .zero entries in the .bss section to allocate room for global variables and arrays
|
|
static void generate_global_variables(void)
|
|
{
|
|
// This section is where zero-initialized global variables lives
|
|
// It is called .bss on linux, and "__DATA, __bss" on macOS
|
|
DIRECTIVE(".section %s", ASM_BSS_SECTION);
|
|
DIRECTIVE(".align 8");
|
|
|
|
// TODO (Task 2): Fill this section with all global variables and global arrays
|
|
// Give each a label you can find later, and the appropriate size.
|
|
// Regular variables are 8 bytes, while arrays are 8 bytes per element.
|
|
// Remember to mangle the name in some way, to avoid collisions with labels
|
|
// (for example, put a '.' in front of the symbol name)
|
|
|
|
// As an example, to set aside 16 bytes and label it .myBytes, write:
|
|
// DIRECTIVE(".myBytes: .zero 16")
|
|
for (int i = 0; i < global_symbols->n_children; i++) {
|
|
symbol_t *sym = global_symbols->children[i];
|
|
switch (sym->type) {
|
|
case SYMBOL_GLOBAL_VAR:
|
|
DIRECTIVE(".%s:\t.zero 8", sym.name);
|
|
break;
|
|
case SYMBOL_GLOBAL_ARRAY:
|
|
node_t num = sym->node->children[1];
|
|
assert(num->type == NUMBER_LITERAL);
|
|
int len = num->data.number_literal * 8;
|
|
DIRECTIVE(".%s:\t.zero %d", sym.name, len);
|
|
break;
|
|
case SYMBOL_FUNCTION:
|
|
break;
|
|
default:
|
|
fprintf(stderr, "unexpected global symbol");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Global variable used to make the functon currently being generated accessible from anywhere
|
|
static symbol_t* current_function;
|
|
|
|
// Prints the entry point. preamble, statements and epilouge of the given function
|
|
static void generate_function(symbol_t* function)
|
|
{
|
|
// TODO (Task 3)
|
|
|
|
// TODO (Task 3.1): Do the prologue, including call frame building and parameter pushing
|
|
// Tip: use the definitions REGISTER_PARAMS and NUM_REGISTER_PARAMS at the top of this file
|
|
LABEL(".%s", function->name);
|
|
|
|
PUSHQ(RBP);
|
|
MOVQ(RSP, RBP);
|
|
|
|
symbol_table_t *symtable = function->function_symtable;
|
|
for (int i = 0; i < symtable->n_symbols; i++) {
|
|
symbol_t *sym = symtable->children[i];
|
|
switch (sym->type) {
|
|
case SYMBOL_PARAMETER:
|
|
if (i < NUM_REGISTER_PARAMS) {
|
|
PUSHQ(REGISTER_PARAMS[i]);
|
|
} else {
|
|
/* these params are already on the stack */;
|
|
}
|
|
break;
|
|
case SYMBOL_LOCAL_VAR:
|
|
PUSHQ("$0");
|
|
break;
|
|
default:
|
|
fprintf(stderr, "unexpected symbol in symtable");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
// TODO (Task 4): the function body can be sent to generate_statement()
|
|
generate_statement(function->node);
|
|
|
|
// TODO (Task 3.2): Emit the epilogue, including a label and a default return value (0)
|
|
LABEL(".%s.epilogue", function->name);
|
|
MOVQ(RBP, RSP);
|
|
POPQ(RBP);
|
|
RET;
|
|
}
|
|
|
|
// Generates code for a function call, which can either be a statement or an expression
|
|
static void generate_function_call(node_t* call)
|
|
{
|
|
// TODO (Task 4.3)
|
|
}
|
|
|
|
// Generates code to evaluate the expression, and place the result in %rax
|
|
static void generate_expression(node_t* expression)
|
|
{
|
|
// TODO (Task 4.1): Generate code for evaluating the given expression.
|
|
// (The candidates are NUMBER_LITERAL, IDENTIFIER, ARRAY_INDEXING, OPERATOR and FUNCTION_CALL)
|
|
switch (expression->type) {
|
|
case NUMBER_LITERAL:
|
|
|
|
break;
|
|
case IDENTIFIER:
|
|
break;
|
|
case ARRAY_INDEXING:
|
|
break;
|
|
case OPERATOR:
|
|
break;
|
|
case FUNCTION_CALL:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void generate_assignment_statement(node_t* statement)
|
|
{
|
|
// TODO (Task 4.2):
|
|
// You can assign to both local variables, global variables and function parameters.
|
|
// Use the IDENTIFIER's symbol to find out what kind of symbol you are assigning to.
|
|
// The left hand side of an assignment statement may also be an ARRAY_INDEXING node.
|
|
// In that case, you must also emit code for evaluating the index being stored to
|
|
}
|
|
|
|
static void generate_print_statement(node_t* statement)
|
|
{
|
|
// TODO (Task 4.4):
|
|
// Remember to call safe_printf instead of printf
|
|
}
|
|
|
|
static void generate_return_statement(node_t* statement)
|
|
{
|
|
// TODO (Task 4.5): Evaluate the return value, store it in %rax and jump to the function epilogue
|
|
}
|
|
|
|
// Recursively generate the given statement node, and all sub-statements.
|
|
static void generate_statement(node_t* node)
|
|
{
|
|
if (node == NULL)
|
|
return;
|
|
|
|
// TODO (Task 4): Generate instructions for statements.
|
|
// The statements you must handle are BLOCK, ASSIGNMENT_STATEMENT,
|
|
// PRINT_STATEMENT, RETURN_STATEMENT and FUNCTION_CALL.
|
|
// Statements of type LOCAL_VARIABLE should be ignored.
|
|
for (int i = 0; i < node->n_children; i++) {
|
|
node_t *child = node->children[i];
|
|
switch (child->type) {
|
|
case BLOCK:
|
|
node_t list = child->children[0];
|
|
for (int j = 0; j < list->n_children; j++)
|
|
generate_statement(list->children[j]);
|
|
break;
|
|
case ASSIGNMENT_STATEMENT:
|
|
generate_assignment_statement(child);
|
|
break;
|
|
case PRINT_STATEMENT:
|
|
generate_print_statement(child);
|
|
break;
|
|
case RETURN_STATEMENT:
|
|
generate_return_statement(child);
|
|
break;
|
|
case FUNCTION_CALL:
|
|
generate_function_call(child);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void generate_safe_printf(void)
|
|
{
|
|
LABEL("safe_printf");
|
|
|
|
PUSHQ(RBP);
|
|
MOVQ(RSP, RBP);
|
|
// This is a bitmask that abuses how negative numbers work, to clear the last 4 bits
|
|
// A stack pointer that is not 16-byte aligned, will be moved down to a 16-byte boundary
|
|
ANDQ("$-16", RSP);
|
|
EMIT("call printf");
|
|
// Cleanup the stack back to how it was
|
|
MOVQ(RBP, RSP);
|
|
POPQ(RBP);
|
|
RET;
|
|
}
|
|
|
|
// Generates the scaffolding for parsing integers from the command line, and passing them to the
|
|
// entry point of the VSL program. The VSL entry function is specified using the parameter "first".
|
|
static void generate_main(symbol_t* first)
|
|
{
|
|
// Make the globally available main function
|
|
LABEL("main");
|
|
|
|
// Save old base pointer, and set new base pointer
|
|
PUSHQ(RBP);
|
|
MOVQ(RSP, RBP);
|
|
|
|
// Which registers argc and argv are passed in
|
|
const char* argc = RDI;
|
|
const char* argv = RSI;
|
|
|
|
const size_t expected_args = FUNC_PARAM_COUNT(first);
|
|
|
|
SUBQ("$1", argc); // argc counts the name of the binary, so subtract that
|
|
EMIT("cmpq $%ld, %s", expected_args, argc);
|
|
JNE("ABORT"); // If the provdied number of arguments is not equal, go to the abort label
|
|
|
|
if (expected_args == 0)
|
|
goto skip_args; // No need to parse argv
|
|
|
|
// Now we emit a loop to parse all parameters, and push them to the stack,
|
|
// in right-to-left order
|
|
|
|
// First move the argv pointer to the vert rightmost parameter
|
|
EMIT("addq $%ld, %s", expected_args * 8, argv);
|
|
|
|
// We use rcx as a counter, starting at the number of arguments
|
|
MOVQ(argc, RCX);
|
|
LABEL("PARSE_ARGV"); // A loop to parse all parameters
|
|
PUSHQ(argv); // push registers to caller save them
|
|
PUSHQ(RCX);
|
|
|
|
// Now call strtol to parse the argument
|
|
EMIT("movq (%s), %s", argv, RDI); // 1st argument, the char *
|
|
MOVQ("$0", RSI); // 2nd argument, a null pointer
|
|
MOVQ("$10", RDX); // 3rd argument, we want base 10
|
|
EMIT("call strtol");
|
|
|
|
// Restore caller saved registers
|
|
POPQ(RCX);
|
|
POPQ(argv);
|
|
PUSHQ(RAX); // Store the parsed argument on the stack
|
|
|
|
SUBQ("$8", argv); // Point to the previous char*
|
|
EMIT("loop PARSE_ARGV"); // Loop uses RCX as a counter automatically
|
|
|
|
// Now, pop up to 6 arguments into registers instead of stack
|
|
for (size_t i = 0; i < expected_args && i < NUM_REGISTER_PARAMS; i++)
|
|
POPQ(REGISTER_PARAMS[i]);
|
|
|
|
skip_args:
|
|
|
|
EMIT("call .%s", first->name);
|
|
MOVQ(RAX, RDI); // Move the return value of the function into RDI
|
|
EMIT("call exit"); // Exit with the return value as exit code
|
|
|
|
LABEL("ABORT"); // In case of incorrect number of arguments
|
|
EMIT("leaq errout(%s), %s", RIP, RDI);
|
|
EMIT("call puts"); // print the errout string
|
|
MOVQ("$1", RDI);
|
|
EMIT("call exit"); // Exit with return code 1
|
|
|
|
generate_safe_printf();
|
|
|
|
// Declares global symbols we use or emit, such as main and printf
|
|
DIRECTIVE("%s", ASM_DECLARE_SYMBOLS);
|
|
}
|