diff --git a/ps3/.gitignore b/ps3/.gitignore new file mode 100644 index 0000000..45e04c4 --- /dev/null +++ b/ps3/.gitignore @@ -0,0 +1,13 @@ +.DS_Store +.idea +*.log +tmp/ + +build/ + +*.ast +*.svg +*.symbols +*.S +*.out +!vsl_programs/*/suggested/* diff --git a/ps3/CMakeLists.txt b/ps3/CMakeLists.txt new file mode 100644 index 0000000..7bb574c --- /dev/null +++ b/ps3/CMakeLists.txt @@ -0,0 +1,50 @@ +cmake_minimum_required(VERSION 3.21) + +project(vslc VERSION 1.0 LANGUAGES C) + +set(VSLC_SOURCES "src/vslc.c" + "src/tree.c" + "src/graphviz_output.c") + +set(VSLC_LEXER_SOURCE "src/scanner.l") +set(VSLC_PARSER_SOURCE "src/parser.y") + + +# === Setup generation of parser and scanner .c files and support headers +find_package(FLEX 2.6 REQUIRED) +find_package(BISON 3.5 REQUIRED) + +# It is highly recommended to have bison v. 3.8 or later +# This version added the very useful counterexample-feature +if(BISON_VERSION VERSION_GREATER_EQUAL 3.8) + set(BISON_FLAGS -Wcounterexamples) +endif() + +set(GEN_DIR "${CMAKE_CURRENT_BINARY_DIR}") +set(SCANNER_GEN_C "${GEN_DIR}/scanner.c") +set(PARSER_GEN_C "${GEN_DIR}/parser.c") + +flex_target(scanner "${VSLC_LEXER_SOURCE}" "${SCANNER_GEN_C}" DEFINES_FILE "${GEN_DIR}/scanner.h") +bison_target(parser "${VSLC_PARSER_SOURCE}" "${PARSER_GEN_C}" DEFINES_FILE "${GEN_DIR}/parser.h" + COMPILE_FLAGS ${BISON_FLAGS}) +add_flex_bison_dependency(scanner parser) + + +# === Finally declare the compiler target, depending on all .c files in the project === +add_executable(vslc "${VSLC_SOURCES}" "${SCANNER_GEN_C}" "${PARSER_GEN_C}") +# Set some flags specifically for flex/bison +target_include_directories(vslc PRIVATE src "${GEN_DIR}") +target_compile_definitions(vslc PRIVATE "YYSTYPE=node_t *") +# Set general compiler flags, such as getting strdup from posix +target_compile_options(vslc PRIVATE -std=c17 -D_POSIX_C_SOURCE=200809L -Wall -g) + + +# === If Address Sanitizer is enabled, add the compiler and linker flag === + +# Enable ASan by invoking: +# cmake -B build -DUSE_ADDRESS_SANITIZER=ON +set (USE_ADDRESS_SANITIZER OFF CACHE BOOL "Should the Address Sanitizer tool be enabled?") +if (USE_ADDRESS_SANITIZER) + target_compile_options(vslc PRIVATE -fsanitize=address) + target_link_options(vslc PRIVATE -fsanitize=address) +endif() diff --git a/ps3/flake.lock b/ps3/flake.lock new file mode 100644 index 0000000..a608aa3 --- /dev/null +++ b/ps3/flake.lock @@ -0,0 +1,27 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1771369470, + "narHash": "sha256-0NBlEBKkN3lufyvFegY4TYv5mCNHbi5OmBDrzihbBMQ=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "0182a361324364ae3f436a63005877674cf45efb", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/ps3/flake.nix b/ps3/flake.nix new file mode 100644 index 0000000..d761cd1 --- /dev/null +++ b/ps3/flake.nix @@ -0,0 +1,21 @@ +{ + description = "devshell for compilers/ps2"; + + inputs.nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable"; + + outputs = { self, nixpkgs }: + let + system = "x86_64-linux"; + pkgs = import nixpkgs { inherit system; }; + in { + devShells.x86_64-linux.default = pkgs.mkShell { + buildInputs = with pkgs; [ + flex + bison + gdb + cmake + libgcc + ]; + }; + }; +} diff --git a/ps3/src/graphviz_output.c b/ps3/src/graphviz_output.c new file mode 100644 index 0000000..3794b13 --- /dev/null +++ b/ps3/src/graphviz_output.c @@ -0,0 +1,71 @@ +#include "vslc.h" + +// Helper function for escaping special characters when printing GraphViz strings +static void print_escaped_string(char* str) +{ + for (char* c = str; *c != '\0'; c++) + { + switch (*c) + { + case '\\': + printf("\\\\"); + break; + case '"': + printf("\\\""); + break; + case '\n': + printf("\\\\n"); + break; + default: + putchar(*c); + break; + } + } +} + +// A recursive function for printing a node as GraphViz, and all its children +static void graphviz_node_print_internal(node_t* node) +{ + printf("node%p [label=\"%s", node, NODE_TYPE_NAMES[node->type]); + switch (node->type) + { + case OPERATOR: + printf("\\n%s", node->data.operator); + break; + case IDENTIFIER: + printf("\\n%s", node->data.identifier); + break; + case NUMBER_LITERAL: + printf("\\n%ld", node->data.number_literal); + break; + case STRING_LITERAL: + printf("\\n"); + print_escaped_string(node->data.string_literal); + break; + case STRING_LIST_REFERENCE: + printf("\\n%zu", node->data.string_list_index); + break; + default: + break; + } + + printf("\"];\n"); + for (size_t i = 0; i < node->n_children; i++) + { + node_t* child = node->children[i]; + if (child == NULL) + printf("node%p -- node%pNULL%zu ;\n", node, node, i); + else + { + printf("node%p -- node%p ;\n", node, child); + graphviz_node_print_internal(child); + } + } +} + +void graphviz_node_print(node_t* root) +{ + printf("graph \"\" {\n node[shape=box];\n"); + graphviz_node_print_internal(root); + printf("}\n"); +} diff --git a/ps3/src/nodetypes.h b/ps3/src/nodetypes.h new file mode 100644 index 0000000..9faf7ac --- /dev/null +++ b/ps3/src/nodetypes.h @@ -0,0 +1,33 @@ +// This is a special file that is not intended to be #include-d normally. +// Instead, it is included by "tree.h" and "tree.c" to provide both an enum of node types, +// and an array of strings containing the node names. + +// clang-format off + +#ifndef NODE_TYPE +#error The file nodetypes.h should only be included after defining the NODE_TYPE macro +#endif + +NODE_TYPE(LIST), +NODE_TYPE(GLOBAL_VARIABLE_DECLARATION), +NODE_TYPE(ARRAY_INDEXING), +NODE_TYPE(VARIABLE), +NODE_TYPE(FUNCTION), +NODE_TYPE(BLOCK), +NODE_TYPE(LOCAL_VARIABLE_DECLARATION), +NODE_TYPE(LOCAL_VARIABLE), +NODE_TYPE(ASSIGNMENT_STATEMENT), +NODE_TYPE(RETURN_STATEMENT), +NODE_TYPE(PRINT_STATEMENT), +NODE_TYPE(PRINTLN_STATEMENT), +NODE_TYPE(IF_STATEMENT), +NODE_TYPE(WHILE_STATEMENT), +NODE_TYPE(BREAK_STATEMENT), +NODE_TYPE(FUNCTION_CALL), +NODE_TYPE(OPERATOR), // uses the data field "operator" +NODE_TYPE(IDENTIFIER), // uses and owns the data field "identifer" +NODE_TYPE(NUMBER_LITERAL), // uses the data field "number_literal" +NODE_TYPE(STRING_LITERAL), // uses and owns the data field "string_literal" +NODE_TYPE(STRING_LIST_REFERENCE), // uses the data field "string_list_index" + +#undef NODE_TYPE diff --git a/ps3/src/parser.y b/ps3/src/parser.y new file mode 100644 index 0000000..abc538d --- /dev/null +++ b/ps3/src/parser.y @@ -0,0 +1,239 @@ +%{ +#include "vslc.h" + +// State variables from the flex generated scanner +extern int yylineno; // The line currently being read +extern char yytext[]; // The text of the last consumed lexeme + +// The main flex driver function used by the parser +int yylex(void); + +// The function called by the parser when errors occur +int yyerror(const char *error) +{ + fprintf(stderr, "%s on line %d\n", error, yylineno); + exit(EXIT_FAILURE); +} + +// Feel free to define #define macros if you want to +%} + +%token FUNC VAR RETURN PRINT PRINTLN IF ELSE WHILE BREAK AND OR +%token NUMBER_TOKEN IDENTIFIER_TOKEN STRING_TOKEN + +// Use operator precedence to ensure order of operations is correct +%left '?' ':' // The ternary ? : operator has the lowest precedence of them all +%left OR // Or has lower precedence than and, just like in C +%left AND +%left '=' '!' // == and != +%left '<' '>' // < <= > and >= +%left '+' '-' +%left '*' '/' +%right UNARY_OPERATORS + +// Resolve the nested if-if-else ambiguity with precedence +%nonassoc ')' +%nonassoc ELSE + +%% +program : + global_list { root = $1; } + ; +global_list : + global { $$ = node_create(LIST, 1, $1); } + | global_list global { $$ = append_to_list_node($1, $2); } + ; +global : + function { $$ = $1; } + | global_variable_declaration { $$ = $1; } + ; +global_variable_declaration : + VAR global_variable_list { $$ = node_create(GLOBAL_VARIABLE_DECLARATION, 1, $2); } + ; +global_variable_list : + global_variable { $$ = node_create(LIST, 1, $1); } + | global_variable_list ',' global_variable { $$ = append_to_list_node($1, $3); } + ; +global_variable : + identifier { $$ = $1; } + | array_indexing { $$ = $1; } + ; +array_indexing : + identifier '[' expression ']' { $$ = node_create(ARRAY_INDEXING, 2, $1, $3); } + ; +parameter_list : + identifier { $$ = node_create(LIST, 1, $1); } + | parameter_list ',' identifier { $$ = append_to_list_node($1, $3); } + | { $$ = node_create(LIST, 0); } + ; +function : + FUNC identifier '(' parameter_list ')' statement { $$ = node_create(FUNCTION, 3, $2, $4, $6); } + ; +statement : + block { $$ = $1; } + | assignment_statement { $$ = $1; } + | return_statement { $$ = $1; } + | print_statement { $$ = $1; } + | println_statement { $$ = $1; } + | if_statement { $$ = $1; } + | while_statement { $$ = $1; } + | break_statement { $$ = $1; } + | function_call { $$ = $1; } + ; +block : + '{' statement_or_declaration_list '}' { $$ = node_create(BLOCK, 1, $2); } + ; +statement_or_declaration_list : + statement_or_declaration_list statement_or_declaration { $$ = append_to_list_node($1, $2); } + | { $$ = node_create(LIST, 0); } + ; +statement_or_declaration : + statement { $$ = $1; } + | local_variable_declaration { $$ = $1; } + ; +local_variable_declaration : + VAR local_variable_list { $$ = node_create(LOCAL_VARIABLE_DECLARATION, 1, $2); } + ; +local_variable_list : + local_variable { $$ = node_create(LIST, 1, $1); } + | local_variable_list ',' local_variable { $$ = append_to_list_node($1, $3); } + ; +local_variable : + identifier { $$ = node_create(LOCAL_VARIABLE, 1, $1); } + | identifier '=' expression { $$ = node_create(LOCAL_VARIABLE, 2, $1, $3); } + ; +assignment_statement : + identifier '=' expression { $$ = node_create(ASSIGNMENT_STATEMENT, 2, $1, $3); } + | array_indexing '=' expression { $$ = node_create(ASSIGNMENT_STATEMENT, 2, $1, $3); } + ; +return_statement : + RETURN expression { $$ = node_create(RETURN_STATEMENT, 1, $2); } + ; +print_statement : + PRINT '(' print_list ')' { $$ = node_create(PRINT_STATEMENT, 1, $3); } + ; +println_statement : + PRINTLN '(' print_list ')' { $$ = node_create(PRINTLN_STATEMENT, 1, $3); } + ; +print_list : + print_item { $$ = node_create(LIST, 1, $1); } + | print_list ',' print_item { $$ = append_to_list_node($1, $3); } + ; +print_item : + expression { $$ = $1; } + | string { $$ = $1; } + ; +break_statement : + BREAK { $$ = node_create(BREAK_STATEMENT, 0); } + ; +if_statement : + IF '(' expression ')' statement { $$ = node_create(IF_STATEMENT, 2, $3, $5); } + | IF '(' expression ')' statement ELSE statement { $$ = node_create(IF_STATEMENT, 3, $3, $5, $7); } + ; +while_statement : + WHILE '(' expression ')' statement { $$ = node_create(WHILE_STATEMENT, 2, $3, $5); } + ; +expression : + expression '?' expression ':' expression { + $$ = node_create(OPERATOR, 3, $1, $3, $5); + $$->data.operator = "?:"; + } + | expression OR expression { + $$ = node_create(OPERATOR, 2, $1, $3); + $$->data.operator = "or"; + } + | expression AND expression { + $$ = node_create(OPERATOR, 2, $1, $3); + $$->data.operator = "and"; + } + | expression '=' '=' expression { + $$ = node_create(OPERATOR, 2, $1, $4); + $$->data.operator = "=="; + } + | expression '!' '=' expression { + $$ = node_create(OPERATOR, 2, $1, $4); + $$->data.operator = "!="; + } + | expression '<' expression { + $$ = node_create(OPERATOR, 2, $1, $3); + $$->data.operator = "<"; + } + | expression '<' '=' expression { + $$ = node_create(OPERATOR, 2, $1, $4); + $$->data.operator = "<="; + } + | expression '>' expression { + $$ = node_create(OPERATOR, 2, $1, $3); + $$->data.operator = ">"; + } + | expression '>' '=' expression { + $$ = node_create(OPERATOR, 2, $1, $4); + $$->data.operator = ">="; + } + | expression '+' expression { + $$ = node_create(OPERATOR, 2, $1, $3); + $$->data.operator = "+"; + } + | expression '-' expression { + $$ = node_create(OPERATOR, 2, $1, $3); + $$->data.operator = "-"; + } + | expression '*' expression { + $$ = node_create(OPERATOR, 2, $1, $3); + $$->data.operator = "*"; + } + | expression '/' expression { + $$ = node_create(OPERATOR, 2, $1, $3); + $$->data.operator = "/"; + } + | '-' expression { + $$ = node_create(OPERATOR, 1, $2); + $$->data.operator = "-"; + } %prec UNARY_OPERATORS + | '!' expression { + $$ = node_create(OPERATOR, 1, $2); + $$->data.operator = "!"; + } %prec UNARY_OPERATORS + | '(' expression ')' { $$ = $2; } + | number { $$ = $1; } + | identifier { $$ = $1; } + | array_indexing { $$ = $1; } + | function_call { $$ = $1; } + ; +function_call : + identifier '(' argument_list ')' { $$ = node_create(FUNCTION_CALL, 2, $1, $3); } + ; +argument_list : + expression_list { $$ = $1; } + | { $$ = node_create(LIST, 0); } + ; +expression_list : + expression { $$ = node_create(LIST, 1, $1); } + | expression_list ',' expression { $$ = append_to_list_node($1, $3); } + ; +identifier : + IDENTIFIER_TOKEN + { + // Create a node with 0 children to represent the identifier + $$ = node_create(IDENTIFIER, 0); + // Allocate a copy of yytext to keep in the syntax tree as data + $$->data.identifier = strdup(yytext); + } + ; +number : + NUMBER_TOKEN { + $$ = node_create(NUMBER_LITERAL, 0); + char *t; + $$->data.number_literal = strtol(yytext, &t, 10); + if (yytext == t) { + fprintf(stderr, "failed to parse number literal: %s", yytext); + } + } + ; +string : + STRING_TOKEN { + $$ = node_create(STRING_LITERAL, 0); + $$->data.string_literal = strdup(yytext); + } + ; +%% diff --git a/ps3/src/scanner.l b/ps3/src/scanner.l new file mode 100644 index 0000000..dad6d4b --- /dev/null +++ b/ps3/src/scanner.l @@ -0,0 +1,39 @@ +%{ +#include "vslc.h" + +// The tokens defined in parser.y +#include "parser.h" + +// parser.h contains some unused functions, ignore that +#pragma GCC diagnostic ignored "-Wunused-function" +%} + +%option noyywrap +%option array +%option yylineno + +WHITESPACE [ \v\t\n\r] +COMMENT \/\/[^\n]+ +QUOTED \"([^\"\n]|\\\")*\" +NUMERIC [0-9]+ +BINDING [a-zA-Z][a-zA-Z_0-9]* + +%% +{WHITESPACE}+ { /* Eliminate whitespace */ } +{COMMENT} { /* Eliminate comments */ } +{QUOTED} { return STRING_TOKEN; } +func { return FUNC; } +var { return VAR; } +return { return RETURN; } +print { return PRINT; } +println { return PRINTLN; } +if { return IF; } +else { return ELSE; } +while { return WHILE; } +break { return BREAK; } +and { return AND; } +or { return OR; } +{NUMERIC} { return NUMBER_TOKEN; } +{BINDING} { return IDENTIFIER_TOKEN; } +. { return yytext[0]; } +%% diff --git a/ps3/src/tree.c b/ps3/src/tree.c new file mode 100644 index 0000000..efdd004 --- /dev/null +++ b/ps3/src/tree.c @@ -0,0 +1,217 @@ +#include "vslc.h" + +// Global root for abstract syntax tree +node_t* root; + +// Declarations of helper functions defined further down in this file +static void node_print(node_t* node, int nesting); +static node_t* simplify_subtree(node_t* node); +static void node_finalize(node_t* discard); +static void destroy_subtree(node_t* discard); + +// Initialize a node with the given type and children +node_t* node_create(node_type_t type, size_t n_children, ...) +{ + node_t* result = malloc(sizeof(node_t)); + + // Initialize every field in the struct + *result = (node_t){ + .type = type, + .n_children = n_children, + .children = malloc(n_children * sizeof(node_t*)), + }; + + // Read each child node from the va_list + va_list child_list; + va_start(child_list, n_children); + for (size_t i = 0; i < n_children; i++) + { + result->children[i] = va_arg(child_list, node_t*); + } + va_end(child_list); + + return result; +} + +// Append an element to the given LIST node, returns the list node +node_t* append_to_list_node(node_t* list_node, node_t* element) +{ + assert(list_node->type == LIST); + + // Calculate the minimum size of the new allocation + size_t min_allocation_size = list_node->n_children + 1; + + // Round up to the next power of two + size_t new_allocation_size = 1; + while (new_allocation_size < min_allocation_size) + new_allocation_size *= 2; + + // Resize the allocation + list_node->children = realloc(list_node->children, new_allocation_size * sizeof(node_t*)); + + // Insert the new element and increase child count by 1 + list_node->children[list_node->n_children] = element; + list_node->n_children++; + + return list_node; +} + +// Outputs the entire syntax tree to the terminal +void print_syntax_tree(void) +{ + // If the environment variable GRAPHVIZ_OUTPUT is set, print a GraphViz graph in the dot format + if (getenv("GRAPHVIZ_OUTPUT") != NULL) + graphviz_node_print(root); + else + node_print(root, 0); +} + +// Performs constant folding and replaces nodes with simpler nodes +void simplify_syntax_tree(void) +{ + root = simplify_subtree(root); +} + +// Frees all memory held by the syntax tree +void destroy_syntax_tree(void) +{ + destroy_subtree(root); + root = NULL; +} + +// The rest of this file contains private helper functions used by the above functions + +// Prints out the given node and all its children recursively +static void node_print(node_t* node, int nesting) +{ + // Indent the line based on how deep the node is in the syntax tree + printf("%*s", nesting, ""); + + if (node == NULL) + { + printf("(NULL)\n"); + return; + } + + printf("%s", NODE_TYPE_NAMES[node->type]); + + // For nodes with extra data, include it in the printout + switch (node->type) + { + case OPERATOR: + printf(" (%s)", node->data.operator); + break; + case IDENTIFIER: + printf(" (%s)", node->data.identifier); + break; + case NUMBER_LITERAL: + printf(" (%ld)", node->data.number_literal); + break; + case STRING_LITERAL: + printf(" (%s)", node->data.string_literal); + break; + case STRING_LIST_REFERENCE: + printf(" (%zu)", node->data.string_list_index); + break; + default: + break; + } + + putchar('\n'); + + // Recursively print children, with some more indentation + for (size_t i = 0; i < node->n_children; i++) + node_print(node->children[i], nesting + 1); +} + +// If the given OPERATOR node is "and" or "or", converts it to a ternary ?: operator like so: +// a and b ===> a ? b : 0 +// a or b ===> a ? 1 : b +static node_t* convert_operator(node_t* node) +{ + assert(node->type == OPERATOR); + // TODO: Task 2: Implement this function + return node; +} + +// Constant folds the given OPERATOR node, if all its children are NUMBER_LITERAL +static node_t* constant_fold_operator(node_t* node) +{ + assert(node->type == OPERATOR); + // TODO: Task 3: Implement this function + return node; +} + +// Convert PRINTLN_STATEMENT into PRINT_STATEMENT by appending an extra "\n" +static node_t* simplify_println_statement(node_t* node) +{ + assert(node->type == PRINTLN_STATEMENT); + // TODO: Task 4: Implement this function + return node; +} + +// Flatten all LOCAL_VARIABLE_DELCARATION nodes in the block, by converting them +// into individual LOCAL_VARIABLE nodes, and splitting all variable initializations +// into separate ASSIGNMENT_STATEMENT nodes. +static node_t* flatten_variable_declarations(node_t* block) +{ + assert(block->type == BLOCK); + // TODO: Task 5: Implement this function + return block; +} + +// Recursively performs simplifcation of the syntax tree: +// - replacing "and" and "or" operators with the ternary ?: operator +// - constant folding operators where all operands are NUMBER_DATA +// - replacing all PRINTLN_STATEMENT with PRINT_STATEMENT with an extra "\n" +// - flattening LOCAL_VARIABLE_DECLARATION nodes into LOCAL_VARIABLE and ASSIGNMENT +// +// Returns the root of the new subtree. +// Any node that is detached from the tree by this operation must be freed, to avoid memory leaks. +static node_t* simplify_subtree(node_t* node) +{ + if (node == NULL) + return node; + + // TODO: Task 1: Implement this function by calling the above functions when applicable + return node; +} + +// Frees the memory owned by the given node, but does not touch its children +static void node_finalize(node_t* discard) +{ + if (discard == NULL) + return; + + // Only free data if the data field is owned by the node + switch (discard->type) + { + case IDENTIFIER: + free(discard->data.identifier); + break; + case STRING_LITERAL: + free(discard->data.string_literal); + break; + default: + break; + } + free(discard->children); + free(discard); +} + +// Recursively frees the memory owned by the given node, and all its children +static void destroy_subtree(node_t* discard) +{ + if (discard == NULL) + return; + + for (size_t i = 0; i < discard->n_children; i++) + destroy_subtree(discard->children[i]); + node_finalize(discard); +} + +// Definition of the global string array NODE_TYPE_NAMES +const char* NODE_TYPE_NAMES[NODE_TYPE_COUNT] = { +#define NODE_TYPE(node_type) #node_type +#include "nodetypes.h" +}; diff --git a/ps3/src/tree.h b/ps3/src/tree.h new file mode 100644 index 0000000..23420df --- /dev/null +++ b/ps3/src/tree.h @@ -0,0 +1,60 @@ +#ifndef TREE_H +#define TREE_H + +#include +#include + +// Create the node_type_t enum containing all node types defined in nodetypes.h +typedef enum +{ + +#define NODE_TYPE(node_type) node_type +#include "nodetypes.h" + NODE_TYPE_COUNT +} node_type_t; + +// Array containing human-readable names for all node types +extern const char* NODE_TYPE_NAMES[NODE_TYPE_COUNT]; + +// This is the tree node structure for the abstract syntax tree +typedef struct node +{ + node_type_t type; + struct node** children; // An owned list of pointers to child nodes + size_t n_children; // The length of the list of child nodes + + // At most one of the data fields can be used at once. + // The node's type decides which field is active, if any + union + { + const char* operator; // pointer to constant string, such as "+". Not owned + char* identifier; // owned heap allocation. The identifier as a string + int64_t number_literal; // the literal integer value + char* string_literal; // owned heap allocation. Includes the surrounding "quotation marks" + size_t string_list_index; // position in global string list + } data; +} node_t; + +// Global root for parse tree and abstract syntax tree +extern node_t* root; + +// The node creation function, used by the parser +node_t* node_create(node_type_t type, size_t n_children, ...); + +// Append an element to the given LIST node, returns the list node +node_t* append_to_list_node(node_t* list_node, node_t* element); + +// Outputs the entire syntax tree to the terminal +void print_syntax_tree(void); + +// Performs constant folding and node replacements +void simplify_syntax_tree(void); + +// Cleans up the entire syntax tree +void destroy_syntax_tree(void); + +// Special function used when syntax trees are output as graphviz graphs. +// Implemented in graphviz_output.c +void graphviz_node_print(node_t* root); + +#endif // TREE_H diff --git a/ps3/src/vslc.c b/ps3/src/vslc.c new file mode 100644 index 0000000..a325174 --- /dev/null +++ b/ps3/src/vslc.c @@ -0,0 +1,65 @@ +#include "vslc.h" + +#include + +static bool print_full_tree = false; +static bool print_simplified_tree = false; + +static const char* usage = "Compiler for VSL. The input program is read from stdin." + "\n" + "Options:\n" + "\t -h \t Output this text and exit\n" + "\t -t \t Output the abstract syntax tree\n" + "\t -T \t Output the abstract syntax tree after constant folding\n"; + + +// Command line option parsing +static void options(int argc, char** argv) +{ + if (argc == 1) + { + fprintf(stderr, "%s: expected at last one option. See -h for help\n", argv[0]); + exit(EXIT_FAILURE); + } + + while (true) + { + switch (getopt(argc, argv, "htT")) + { + default: // Unrecognized option + fprintf(stderr, "%s: See -h for help\n", argv[0]); + exit(EXIT_FAILURE); + case 'h': + printf("%s:\n%s", argv[0], usage); + exit(EXIT_SUCCESS); + case 't': + print_full_tree = true; + break; + case 'T': + print_simplified_tree = true; + break; + case -1: + return; // Done parsing options + } + } +} + +// Entry point +int main(int argc, char** argv) +{ + options(argc, argv); + + yyparse(); // Generated from grammar/bison, constructs syntax tree + yylex_destroy(); // Free buffers used by flex + + // Operations in tree.c + if (print_full_tree) + print_syntax_tree(); + + simplify_syntax_tree(); + + if (print_simplified_tree) + print_syntax_tree(); + + destroy_syntax_tree(); // In tree.c +} diff --git a/ps3/src/vslc.h b/ps3/src/vslc.h new file mode 100644 index 0000000..7984141 --- /dev/null +++ b/ps3/src/vslc.h @@ -0,0 +1,20 @@ +#ifndef VSLC_H +#define VSLC_H +#include +#include +#include +#include +#include +#include +#include + +// Definition of the tree node type, and functions for handling the parse tree +#include "tree.h" + +// The main driver function of the parser generated by bison +int yyparse(); + +// A "hidden" cleanup function in flex +int yylex_destroy(); + +#endif // VSLC_H diff --git a/ps3/vsl_programs/ps2-parser/arrays.vsl b/ps3/vsl_programs/ps2-parser/arrays.vsl new file mode 100644 index 0000000..52ca0f9 --- /dev/null +++ b/ps3/vsl_programs/ps2-parser/arrays.vsl @@ -0,0 +1,14 @@ + +var array[3], other[20] + +func sum() { + return array[0] + array[1] + array[2] +} + +func main() { + array[0] = 5 + array[1] = 1 + array[2] = array[1] + other[sum()] = sum() + println("Should be 7:", other[7]) +} diff --git a/ps3/vsl_programs/ps2-parser/if.vsl b/ps3/vsl_programs/ps2-parser/if.vsl new file mode 100644 index 0000000..32b27a4 --- /dev/null +++ b/ps3/vsl_programs/ps2-parser/if.vsl @@ -0,0 +1,27 @@ + +func main(a, b) { + if (1) + println("Always") + + if (0) + println("Never") + else + println("This, however!") + + if (a > b) { + println(a, ">", b) + } + else if (a < b) { + println(a, "<", b) + } + else { + println(a, "=", b) + } + + // Now test dangling else + if (a) + if (b) + println("a & b") + else + println("a, but not b") +} diff --git a/ps3/vsl_programs/ps2-parser/operators.vsl b/ps3/vsl_programs/ps2-parser/operators.vsl new file mode 100644 index 0000000..89b3b24 --- /dev/null +++ b/ps3/vsl_programs/ps2-parser/operators.vsl @@ -0,0 +1,34 @@ + +func main() { + var a = 1 + var b = 4 - 1 - 1 + var c = -a * b + 4 * a * 1 + 2 + + a = a / 2 + 1 + + if (a * a < b + c) + println("Yes") + + if (a == 1 or b == 2) + println("Either!") + + if (a == 1 and b == 2) + println("Both!") + + if (!a == 0) + println("Yup") + + if (a <= b == b > a) + print("Alawys!") + + if (a <= b != a >= b) + println("a != b") + + if (a == b == (a != b)) + println("Never!") + + a = a > 4 or b < 2 ? a + 4 : b - 2 + + c = (a <= b) + --(a >= b) + return c +} diff --git a/ps3/vsl_programs/ps2-parser/simple-assignment.vsl b/ps3/vsl_programs/ps2-parser/simple-assignment.vsl new file mode 100644 index 0000000..84ae100 --- /dev/null +++ b/ps3/vsl_programs/ps2-parser/simple-assignment.vsl @@ -0,0 +1,12 @@ + +func main() { + var localVariable + + globalVariable = 10 + localVariable = 3 + + println("Global:", globalVariable, " local:", localVariable) +} + +// Global variables can be declared anywhere in the file +var globalVariable diff --git a/ps3/vsl_programs/ps2-parser/simple-functions.vsl b/ps3/vsl_programs/ps2-parser/simple-functions.vsl new file mode 100644 index 0000000..b5864c9 --- /dev/null +++ b/ps3/vsl_programs/ps2-parser/simple-functions.vsl @@ -0,0 +1,16 @@ + +func main() { + print("Should be 7:", identity(first(second(5, 7), first(2, 9)))) +} + +func identity(argument) { + return argument +} + +func first(a, b) { + return a +} + +func second(a, b) { + return b +} diff --git a/ps3/vsl_programs/ps2-parser/simple-hello.vsl b/ps3/vsl_programs/ps2-parser/simple-hello.vsl new file mode 100644 index 0000000..387fc2c --- /dev/null +++ b/ps3/vsl_programs/ps2-parser/simple-hello.vsl @@ -0,0 +1,5 @@ + +func main() { + print("Hello ") + println("World!") +} diff --git a/ps3/vsl_programs/ps2-parser/variables.vsl b/ps3/vsl_programs/ps2-parser/variables.vsl new file mode 100644 index 0000000..01a2ead --- /dev/null +++ b/ps3/vsl_programs/ps2-parser/variables.vsl @@ -0,0 +1,30 @@ + +var global, myArray[10] + +func main() { + var a, b = 2 + a = 5 + var c + global = 3 + + // A block is itself a statement + { + var d = a + b * c + println("d:", d) + } + + if (1) { + var x + x = a*b*c + + if (x > 0) { + // Declare a new x, shadowing the outer x + var x + x = a + global + } else { + x = a + myArray[global] = 2 + } + println("x:", x) + } +} diff --git a/ps3/vsl_programs/ps2-parser/while.vsl b/ps3/vsl_programs/ps2-parser/while.vsl new file mode 100644 index 0000000..50a7e9d --- /dev/null +++ b/ps3/vsl_programs/ps2-parser/while.vsl @@ -0,0 +1,20 @@ + +func callMe(i) { + println("i is now", i) +} + +func main(start, end) { + var counter = start + while (counter < end) { + callMe(counter) + counter = counter + 1 + } + + // Go down again using while 1 + break + while (1) { + counter = counter - 1 + if (counter < start) + break + callMe(counter) + } +} diff --git a/ps3/vsl_programs/ps3-simplify/and-or-convert.vsl b/ps3/vsl_programs/ps3-simplify/and-or-convert.vsl new file mode 100644 index 0000000..f164e9d --- /dev/null +++ b/ps3/vsl_programs/ps3-simplify/and-or-convert.vsl @@ -0,0 +1,14 @@ +func other() { + return 1 +} + +func main(a) { + if (a and other()) + print("Hei") + + if (a or other()) + print("Yo") + + if (a > 2 or other() - 1 and a + 1 < 10) + print("Nope") +} diff --git a/ps3/vsl_programs/ps3-simplify/constant-fold.vsl b/ps3/vsl_programs/ps3-simplify/constant-fold.vsl new file mode 100644 index 0000000..5921f05 --- /dev/null +++ b/ps3/vsl_programs/ps3-simplify/constant-fold.vsl @@ -0,0 +1,26 @@ +func main() { + print(3 + 5) + print(3 - -6) + print(20 * 4) + print(111 / 11) + + print(50 and 10) + print(0 or 20) + print(0 and 1) + print(1 ? 5 : 7) + print(0 ? 20 : 30) + + return other() +} + +func other() { + var x + x = !5 == !6 + x = !(5 != 6) + x = 10 < 10 + x = 10 <= 10 + x = 10 > 10 + x = 10 >= 10 + x = 40 + 20 + x * 2 + return x +} diff --git a/ps3/vsl_programs/ps3-simplify/println-convert.vsl b/ps3/vsl_programs/ps3-simplify/println-convert.vsl new file mode 100644 index 0000000..bd65e9d --- /dev/null +++ b/ps3/vsl_programs/ps3-simplify/println-convert.vsl @@ -0,0 +1,6 @@ +func main() { + print(20, " and ", 40) + println("!") + + println(50, " and ", 50) +} diff --git a/ps3/vsl_programs/ps3-simplify/var-convert.vsl b/ps3/vsl_programs/ps3-simplify/var-convert.vsl new file mode 100644 index 0000000..7d29d26 --- /dev/null +++ b/ps3/vsl_programs/ps3-simplify/var-convert.vsl @@ -0,0 +1,10 @@ +func main() { + var a, b = 2, c = b + + print(b) + + var c + c = 10 + + print(c) +}