Files
TDT4205/ps5/src/generator.c
2026-03-29 21:36:15 +02:00

325 lines
11 KiB
C

#include "vslc.h"
// This header defines a bunch of macros we can use to emit assembly to stdout
#include "emit.h"
// In the System V calling convention, the first 6 integer parameters are passed in registers
#define NUM_REGISTER_PARAMS 6
static const char* REGISTER_PARAMS[6] = {RDI, RSI, RDX, RCX, R8, R9};
// Takes in a symbol of type SYMBOL_FUNCTION, and returns how many parameters the function takes
#define FUNC_PARAM_COUNT(func) ((func)->node->children[1]->n_children)
static void generate_stringtable(void);
static void generate_global_variables(void);
static void generate_function(symbol_t* function);
static void generate_expression(node_t* expression);
static void generate_statement(node_t* node);
static void generate_main(symbol_t* first);
// Entry point for code generation
void generate_program(void)
{
generate_stringtable();
generate_global_variables();
// This directive announces that the following assembly belongs to the .text section,
// which is the section where all executable assembly lives
DIRECTIVE(".text");
// TODO (Task 3):
// For each function in global_symbols, generate it using generate_function ()
bool found = false;
symbol_t *entry;
for (int i = 0; i < global_symbols->n_children; i++) {
symbol_t *sym = global_symbols->children[i];
if (sym->type == SYMBOL_FUNCTION) {
if (!found) {
entry = sym;
found = true;
}
generate_function(sym);
}
}
// In VSL, the topmost function in a program is its entry point.
// We want to be able to take parameters from the command line,
// and have them be sent into the entry point function.
//
// Due to the fact that parameters are all passed as strings,
// and passed as the (argc, argv)-pair, we need to make a wrapper for our entry function.
// This wrapper handles string -> int64_t conversion, and is already implemented.
// call generate_main ( <entry point function symbol> );
generate_main(entry);
}
// Prints one .asciz entry for each string in the global string_list
static void generate_stringtable(void)
{
// This section is where read-only string data is stored
// It is called .rodata on Linux, and "__TEXT, __cstring" on macOS
DIRECTIVE(".section %s", ASM_STRING_SECTION);
// These strings are used by printf
DIRECTIVE("intout: .asciz \"%s\"", "%ld");
DIRECTIVE("strout: .asciz \"%s\"", "%s");
// This string is used by the entry point-wrapper
DIRECTIVE("errout: .asciz \"%s\"", "Wrong number of arguments");
// TODO (Task 1): Print all strings in the program here, with labels you can refer to later
// You have access to the global variables string_list and string_list_len from symbols.c
for (int i = 0; i < string_list_len; i++) {
DIRECTIVE("string%d:\t.asciz \"%s\"", i, string_list[i]);
}
}
// Prints .zero entries in the .bss section to allocate room for global variables and arrays
static void generate_global_variables(void)
{
// This section is where zero-initialized global variables lives
// It is called .bss on linux, and "__DATA, __bss" on macOS
DIRECTIVE(".section %s", ASM_BSS_SECTION);
DIRECTIVE(".align 8");
// TODO (Task 2): Fill this section with all global variables and global arrays
// Give each a label you can find later, and the appropriate size.
// Regular variables are 8 bytes, while arrays are 8 bytes per element.
// Remember to mangle the name in some way, to avoid collisions with labels
// (for example, put a '.' in front of the symbol name)
// As an example, to set aside 16 bytes and label it .myBytes, write:
// DIRECTIVE(".myBytes: .zero 16")
for (int i = 0; i < global_symbols->n_children; i++) {
symbol_t *sym = global_symbols->children[i];
switch (sym->type) {
case SYMBOL_GLOBAL_VAR:
DIRECTIVE(".%s:\t.zero 8", sym.name);
break;
case SYMBOL_GLOBAL_ARRAY:
node_t num = sym->node->children[1];
assert(num->type == NUMBER_LITERAL);
int len = num->data.number_literal * 8;
DIRECTIVE(".%s:\t.zero %d", sym.name, len);
break;
case SYMBOL_FUNCTION:
break;
default:
fprintf(stderr, "unexpected global symbol");
exit(EXIT_FAILURE);
}
}
}
// Global variable used to make the functon currently being generated accessible from anywhere
static symbol_t* current_function;
// Prints the entry point. preamble, statements and epilouge of the given function
static void generate_function(symbol_t* function)
{
// TODO (Task 3)
// TODO (Task 3.1): Do the prologue, including call frame building and parameter pushing
// Tip: use the definitions REGISTER_PARAMS and NUM_REGISTER_PARAMS at the top of this file
LABEL(".%s", function->name);
PUSHQ(RBP);
MOVQ(RSP, RBP);
symbol_table_t *symtable = function->function_symtable;
for (int i = 0; i < symtable->n_symbols; i++) {
symbol_t *sym = symtable->children[i];
switch (sym->type) {
case SYMBOL_PARAMETER:
if (i < NUM_REGISTER_PARAMS) {
PUSHQ(REGISTER_PARAMS[i]);
} else {
/* these params are already on the stack */;
}
break;
case SYMBOL_LOCAL_VAR:
PUSHQ("$0");
break;
default:
fprintf(stderr, "unexpected symbol in symtable");
exit(EXIT_FAILURE);
}
}
// TODO (Task 4): the function body can be sent to generate_statement()
generate_statement(function->node);
// TODO (Task 3.2): Emit the epilogue, including a label and a default return value (0)
LABEL(".%s.epilogue", function->name);
MOVQ(RBP, RSP);
POPQ(RBP);
RET;
}
// Generates code for a function call, which can either be a statement or an expression
static void generate_function_call(node_t* call)
{
// TODO (Task 4.3)
}
// Generates code to evaluate the expression, and place the result in %rax
static void generate_expression(node_t* expression)
{
// TODO (Task 4.1): Generate code for evaluating the given expression.
// (The candidates are NUMBER_LITERAL, IDENTIFIER, ARRAY_INDEXING, OPERATOR and FUNCTION_CALL)
switch (expression->type) {
case NUMBER_LITERAL:
break;
case IDENTIFIER:
break;
case ARRAY_INDEXING:
break;
case OPERATOR:
break;
case FUNCTION_CALL:
break;
}
}
static void generate_assignment_statement(node_t* statement)
{
// TODO (Task 4.2):
// You can assign to both local variables, global variables and function parameters.
// Use the IDENTIFIER's symbol to find out what kind of symbol you are assigning to.
// The left hand side of an assignment statement may also be an ARRAY_INDEXING node.
// In that case, you must also emit code for evaluating the index being stored to
}
static void generate_print_statement(node_t* statement)
{
// TODO (Task 4.4):
// Remember to call safe_printf instead of printf
}
static void generate_return_statement(node_t* statement)
{
// TODO (Task 4.5): Evaluate the return value, store it in %rax and jump to the function epilogue
}
// Recursively generate the given statement node, and all sub-statements.
static void generate_statement(node_t* node)
{
if (node == NULL)
return;
// TODO (Task 4): Generate instructions for statements.
// The statements you must handle are BLOCK, ASSIGNMENT_STATEMENT,
// PRINT_STATEMENT, RETURN_STATEMENT and FUNCTION_CALL.
// Statements of type LOCAL_VARIABLE should be ignored.
for (int i = 0; i < node->n_children; i++) {
node_t *child = node->children[i];
switch (child->type) {
case BLOCK:
node_t list = child->children[0];
for (int j = 0; j < list->n_children; j++)
generate_statement(list->children[j]);
break;
case ASSIGNMENT_STATEMENT:
generate_assignment_statement(child);
break;
case PRINT_STATEMENT:
generate_print_statement(child);
break;
case RETURN_STATEMENT:
generate_return_statement(child);
break;
case FUNCTION_CALL:
generate_function_call(child);
break;
}
}
}
static void generate_safe_printf(void)
{
LABEL("safe_printf");
PUSHQ(RBP);
MOVQ(RSP, RBP);
// This is a bitmask that abuses how negative numbers work, to clear the last 4 bits
// A stack pointer that is not 16-byte aligned, will be moved down to a 16-byte boundary
ANDQ("$-16", RSP);
EMIT("call printf");
// Cleanup the stack back to how it was
MOVQ(RBP, RSP);
POPQ(RBP);
RET;
}
// Generates the scaffolding for parsing integers from the command line, and passing them to the
// entry point of the VSL program. The VSL entry function is specified using the parameter "first".
static void generate_main(symbol_t* first)
{
// Make the globally available main function
LABEL("main");
// Save old base pointer, and set new base pointer
PUSHQ(RBP);
MOVQ(RSP, RBP);
// Which registers argc and argv are passed in
const char* argc = RDI;
const char* argv = RSI;
const size_t expected_args = FUNC_PARAM_COUNT(first);
SUBQ("$1", argc); // argc counts the name of the binary, so subtract that
EMIT("cmpq $%ld, %s", expected_args, argc);
JNE("ABORT"); // If the provdied number of arguments is not equal, go to the abort label
if (expected_args == 0)
goto skip_args; // No need to parse argv
// Now we emit a loop to parse all parameters, and push them to the stack,
// in right-to-left order
// First move the argv pointer to the vert rightmost parameter
EMIT("addq $%ld, %s", expected_args * 8, argv);
// We use rcx as a counter, starting at the number of arguments
MOVQ(argc, RCX);
LABEL("PARSE_ARGV"); // A loop to parse all parameters
PUSHQ(argv); // push registers to caller save them
PUSHQ(RCX);
// Now call strtol to parse the argument
EMIT("movq (%s), %s", argv, RDI); // 1st argument, the char *
MOVQ("$0", RSI); // 2nd argument, a null pointer
MOVQ("$10", RDX); // 3rd argument, we want base 10
EMIT("call strtol");
// Restore caller saved registers
POPQ(RCX);
POPQ(argv);
PUSHQ(RAX); // Store the parsed argument on the stack
SUBQ("$8", argv); // Point to the previous char*
EMIT("loop PARSE_ARGV"); // Loop uses RCX as a counter automatically
// Now, pop up to 6 arguments into registers instead of stack
for (size_t i = 0; i < expected_args && i < NUM_REGISTER_PARAMS; i++)
POPQ(REGISTER_PARAMS[i]);
skip_args:
EMIT("call .%s", first->name);
MOVQ(RAX, RDI); // Move the return value of the function into RDI
EMIT("call exit"); // Exit with the return value as exit code
LABEL("ABORT"); // In case of incorrect number of arguments
EMIT("leaq errout(%s), %s", RIP, RDI);
EMIT("call puts"); // print the errout string
MOVQ("$1", RDI);
EMIT("call exit"); // Exit with return code 1
generate_safe_printf();
// Declares global symbols we use or emit, such as main and printf
DIRECTIVE("%s", ASM_DECLARE_SYMBOLS);
}