ps2: init

This commit is contained in:
2026-02-14 10:45:22 +01:00
parent 04fc81db96
commit cebd716ff8
18 changed files with 721 additions and 0 deletions

14
ps2/.gitignore vendored Normal file
View File

@@ -0,0 +1,14 @@
.DS_Store
.idea
*.log
tmp/
.cache/
build/
*.ast
*.svg
*.symbols
*.S
*.out
!vsl_programs/*/suggested/*

50
ps2/CMakeLists.txt Normal file
View File

@@ -0,0 +1,50 @@
cmake_minimum_required(VERSION 3.21)
project(vslc VERSION 1.0 LANGUAGES C)
set(VSLC_SOURCES "src/vslc.c"
"src/tree.c"
"src/graphviz_output.c")
set(VSLC_LEXER_SOURCE "src/scanner.l")
set(VSLC_PARSER_SOURCE "src/parser.y")
# === Setup generation of parser and scanner .c files and support headers
find_package(FLEX 2.6 REQUIRED)
find_package(BISON 3.5 REQUIRED)
# It is highly recommended to have bison v. 3.8 or later
# This version added the very useful counterexample-feature
if(BISON_VERSION VERSION_GREATER_EQUAL 3.8)
set(BISON_FLAGS -Wcounterexamples)
endif()
set(GEN_DIR "${CMAKE_CURRENT_BINARY_DIR}")
set(SCANNER_GEN_C "${GEN_DIR}/scanner.c")
set(PARSER_GEN_C "${GEN_DIR}/parser.c")
flex_target(scanner "${VSLC_LEXER_SOURCE}" "${SCANNER_GEN_C}" DEFINES_FILE "${GEN_DIR}/scanner.h")
bison_target(parser "${VSLC_PARSER_SOURCE}" "${PARSER_GEN_C}" DEFINES_FILE "${GEN_DIR}/parser.h"
COMPILE_FLAGS ${BISON_FLAGS})
add_flex_bison_dependency(scanner parser)
# === Finally declare the compiler target, depending on all .c files in the project ===
add_executable(vslc "${VSLC_SOURCES}" "${SCANNER_GEN_C}" "${PARSER_GEN_C}")
# Set some flags specifically for flex/bison
target_include_directories(vslc PRIVATE src "${GEN_DIR}")
target_compile_definitions(vslc PRIVATE "YYSTYPE=node_t *")
# Set general compiler flags, such as getting strdup from posix
target_compile_options(vslc PRIVATE -std=c17 -D_POSIX_C_SOURCE=200809L -Wall -g)
# === If Address Sanitizer is enabled, add the compiler and linker flag ===
# Enable ASan by invoking:
# cmake -B build -DUSE_ADDRESS_SANITIZER=ON
set (USE_ADDRESS_SANITIZER OFF CACHE BOOL "Should the Address Sanitizer tool be enabled?")
if (USE_ADDRESS_SANITIZER)
target_compile_options(vslc PRIVATE -fsanitize=address)
target_link_options(vslc PRIVATE -fsanitize=address)
endif()

71
ps2/src/graphviz_output.c Normal file
View File

@@ -0,0 +1,71 @@
#include "vslc.h"
// Helper function for escaping special characters when printing GraphViz strings
static void print_escaped_string(char* str)
{
for (char* c = str; *c != '\0'; c++)
{
switch (*c)
{
case '\\':
printf("\\\\");
break;
case '"':
printf("\\\"");
break;
case '\n':
printf("\\\\n");
break;
default:
putchar(*c);
break;
}
}
}
// A recursive function for printing a node as GraphViz, and all its children
static void graphviz_node_print_internal(node_t* node)
{
printf("node%p [label=\"%s", node, NODE_TYPE_NAMES[node->type]);
switch (node->type)
{
case OPERATOR:
printf("\\n%s", node->data.operator);
break;
case IDENTIFIER:
printf("\\n%s", node->data.identifier);
break;
case NUMBER_LITERAL:
printf("\\n%ld", node->data.number_literal);
break;
case STRING_LITERAL:
printf("\\n");
print_escaped_string(node->data.string_literal);
break;
case STRING_LIST_REFERENCE:
printf("\\n%zu", node->data.string_list_index);
break;
default:
break;
}
printf("\"];\n");
for (size_t i = 0; i < node->n_children; i++)
{
node_t* child = node->children[i];
if (child == NULL)
printf("node%p -- node%pNULL%zu ;\n", node, node, i);
else
{
printf("node%p -- node%p ;\n", node, child);
graphviz_node_print_internal(child);
}
}
}
void graphviz_node_print(node_t* root)
{
printf("graph \"\" {\n node[shape=box];\n");
graphviz_node_print_internal(root);
printf("}\n");
}

33
ps2/src/nodetypes.h Normal file
View File

@@ -0,0 +1,33 @@
// This is a special file that is not intended to be #include-d normally.
// Instead, it is included by "tree.h" and "tree.c" to provide both an enum of node types,
// and an array of strings containing the node names.
// clang-format off
#ifndef NODE_TYPE
#error The file nodetypes.h should only be included after defining the NODE_TYPE macro
#endif
NODE_TYPE(LIST),
NODE_TYPE(GLOBAL_VARIABLE_DECLARATION),
NODE_TYPE(ARRAY_INDEXING),
NODE_TYPE(VARIABLE),
NODE_TYPE(FUNCTION),
NODE_TYPE(BLOCK),
NODE_TYPE(LOCAL_VARIABLE_DECLARATION),
NODE_TYPE(LOCAL_VARIABLE),
NODE_TYPE(ASSIGNMENT_STATEMENT),
NODE_TYPE(RETURN_STATEMENT),
NODE_TYPE(PRINT_STATEMENT),
NODE_TYPE(PRINTLN_STATEMENT),
NODE_TYPE(IF_STATEMENT),
NODE_TYPE(WHILE_STATEMENT),
NODE_TYPE(BREAK_STATEMENT),
NODE_TYPE(FUNCTION_CALL),
NODE_TYPE(OPERATOR), // uses the data field "operator"
NODE_TYPE(IDENTIFIER), // uses and owns the data field "identifer"
NODE_TYPE(NUMBER_LITERAL), // uses the data field "number_literal"
NODE_TYPE(STRING_LITERAL), // uses and owns the data field "string_literal"
NODE_TYPE(STRING_LIST_REFERENCE), // uses the data field "string_list_index"
#undef NODE_TYPE

78
ps2/src/parser.y Normal file
View File

@@ -0,0 +1,78 @@
%{
#include "vslc.h"
// State variables from the flex generated scanner
extern int yylineno; // The line currently being read
extern char yytext[]; // The text of the last consumed lexeme
// The main flex driver function used by the parser
int yylex(void);
// The function called by the parser when errors occur
int yyerror(const char *error)
{
fprintf(stderr, "%s on line %d\n", error, yylineno);
exit(EXIT_FAILURE);
}
// Feel free to define #define macros if you want to
%}
%token FUNC VAR RETURN PRINT PRINTLN IF ELSE WHILE BREAK AND OR
%token NUMBER_TOKEN IDENTIFIER_TOKEN STRING_TOKEN
// Use operator precedence to ensure order of operations is correct
%left '?' ':' // The ternary ? : operator has the lowest precedence of them all
%left OR // Or has lower precedence than and, just like in C
%left AND
%left '=' '!' // == and !=
%left '<' '>' // < <= > and >=
%left '+' '-'
%left '*' '/'
%right UNARY_OPERATORS
// Resolve the nested if-if-else ambiguity with precedence
%nonassoc ')'
%nonassoc ELSE
%%
program :
global_list { root = $1; }
;
global_list :
global { $$ = node_create(LIST, 1, $1); }
| global_list global { $$ = append_to_list_node($1, $2); }
;
global :
global_variable_declaration { $$ = $1; }
;
global_variable_declaration :
VAR global_variable_list { $$ = node_create(GLOBAL_VARIABLE_DECLARATION, 1, $2); }
;
global_variable_list :
global_variable { $$ = node_create(LIST, 1, $1); }
| global_variable_list ',' global_variable { $$ = append_to_list_node($1, $3); }
;
global_variable :
identifier { $$ = $1; }
;
identifier :
IDENTIFIER_TOKEN
{
// Create a node with 0 children to represent the identifier
$$ = node_create(IDENTIFIER, 0);
// Allocate a copy of yytext to keep in the syntax tree as data
$$->data.identifier = strdup(yytext);
}
/*
* This file can currently only recognize global variable declarations, i.e,
*
* var myVar, anotherVar, third
* var theLastOne
*
* TODO:
* Include the remaining modified VSL grammar as specified in the task description.
* This should be a pretty long file when you are done.
*/
%%

43
ps2/src/scanner.l Normal file
View File

@@ -0,0 +1,43 @@
%{
#include "vslc.h"
// The tokens defined in parser.y
#include "parser.h"
// parser.h contains some unused functions, ignore that
#pragma GCC diagnostic ignored "-Wunused-function"
%}
%option noyywrap
%option array
%option yylineno
WHITESPACE [ \t\v\r\n]
COMMENT \/\/[^\n]+
QUOTED \"([^\"\n]|\\\")*\"
%%
{WHITESPACE}+ { /* Eliminate whitespace */ }
{COMMENT} { /* Eliminate comments */ }
{QUOTED} { return STRING_TOKEN; }
/*
* TODO:
*
* Add the rest of the translation rules here.
* See the lexical structure definition of the modified VSL in PS2.
* Also see the `%token` directives in parser.y for all symbolic names that can be returned - e.g. FUNC, IF, IDENTIFIER_TOKEN.
*
* Hint to get you started:
* The WHITESPACE regex defined above is not quite finished. Finish it.
* The scanner returns STRING_TOKEN when matching the QUOTED regex above.
* When should the scanner return a NUMBER_TOKEN, IDENTIFIER_TOKEN, etc?
* In which specific scenarios should the scanner return keyword tokens like FUNC or PRINT?
*
* For operators, which are all a single char or two chars, we let each char be a separate token.
* This is achieved by using the "catch-all" rule at the very bottom of this file.
*/
/* All other chars get returned as single char tokens */
. { return yytext[0]; }
%%

143
ps2/src/tree.c Normal file
View File

@@ -0,0 +1,143 @@
#include "vslc.h"
// Global root for abstract syntax tree
node_t* root;
// Declarations of helper functions defined further down in this file
static void node_print(node_t* node, int nesting);
static void destroy_subtree(node_t* discard);
// Initialize a node with the given type and children
node_t* node_create(node_type_t type, size_t n_children, ...)
{
/*
* TODO:
* Initializer function for a syntax tree node
* HINT:
* Allocate a node_t* using malloc.
* Fill its fields with the specified type and children.
* See include/tree.h for the node_t struct.
* Remember to *allocate* space to hold the list of children children.
* To access the parameters passed as ..., look up "C varargs"
*/
}
// Append an element to the given LIST node, returns the list node
node_t* append_to_list_node(node_t* list_node, node_t* element)
{
assert(list_node->type == LIST);
// Calculate the minimum size of the new allocation
size_t min_allocation_size = list_node->n_children + 1;
// Round up to the next power of two
size_t new_allocation_size = 1;
while (new_allocation_size < min_allocation_size)
new_allocation_size *= 2;
// Resize the allocation
list_node->children = realloc(list_node->children, new_allocation_size * sizeof(node_t*));
// Insert the new element and increase child count by 1
list_node->children[list_node->n_children] = element;
list_node->n_children++;
return list_node;
}
// Outputs the entire syntax tree to the terminal
void print_syntax_tree(void)
{
// If the environment variable GRAPHVIZ_OUTPUT is set, print a GraphViz graph in the dot format
if (getenv("GRAPHVIZ_OUTPUT") != NULL)
graphviz_node_print(root);
else
node_print(root, 0);
}
// Frees all memory held by the syntax tree
void destroy_syntax_tree(void)
{
destroy_subtree(root);
root = NULL;
}
// The rest of this file contains private helper functions used by the above functions
// Prints out the given node and all its children recursively
static void node_print(node_t* node, int nesting)
{
// Indent the line based on how deep the node is in the syntax tree
printf("%*s", nesting, "");
if (node == NULL)
{
printf("(NULL)\n");
return;
}
printf("%s", NODE_TYPE_NAMES[node->type]);
// For nodes with extra data, include it in the printout
switch (node->type)
{
case OPERATOR:
printf(" (%s)", node->data.operator);
break;
case IDENTIFIER:
printf(" (%s)", node->data.identifier);
break;
case NUMBER_LITERAL:
printf(" (%ld)", node->data.number_literal);
break;
case STRING_LITERAL:
printf(" (%s)", node->data.string_literal);
break;
case STRING_LIST_REFERENCE:
printf(" (%zu)", node->data.string_list_index);
break;
default:
break;
}
putchar('\n');
// Recursively print children, with some more indentation
for (size_t i = 0; i < node->n_children; i++)
node_print(node->children[i], nesting + 1);
}
// Frees the memory owned by the given node, but does not touch its children
static void node_finalize(node_t* discard)
{
/*
* TODO:
* Remove memory allocated for a single syntax tree node.
* HINT:
* *Free* all fields owned by this node - see tree.h for a description of its fields.
* Finally free the memory occupied by the node itself.
* Only free the memory owned by this node - do not touch its children.
*/
}
// Recursively frees the memory owned by the given node, and all its children
static void destroy_subtree(node_t* discard)
{
/*
* TODO:
* Remove all nodes in the subtree rooted at a node, recursively.
* HINT:
* Destroy entire *trees* instead of single *nodes*.
* It's a good idead to destory the children first.
* Seems like you can use the `node_finalize` function in some way here...
*/
}
// Definition of the global string array NODE_TYPE_NAMES
const char* NODE_TYPE_NAMES[NODE_TYPE_COUNT] = {
#define NODE_TYPE(node_type) #node_type
#include "nodetypes.h"
};

57
ps2/src/tree.h Normal file
View File

@@ -0,0 +1,57 @@
#ifndef TREE_H
#define TREE_H
#include <stdint.h>
#include <stdlib.h>
// Create the node_type_t enum containing all node types defined in nodetypes.h
typedef enum
{
#define NODE_TYPE(node_type) node_type
#include "nodetypes.h"
NODE_TYPE_COUNT
} node_type_t;
// Array containing human-readable names for all node types
extern const char* NODE_TYPE_NAMES[NODE_TYPE_COUNT];
// This is the tree node structure for the abstract syntax tree
typedef struct node
{
node_type_t type;
struct node** children; // An owned list of pointers to child nodes
size_t n_children; // The length of the list of child nodes
// At most one of the data fields can be used at once.
// The node's type decides which field is active, if any
union
{
const char* operator; // pointer to constant string, such as "+". Not owned
char* identifier; // owned heap allocation. The identifier as a string
int64_t number_literal; // the literal integer value
char* string_literal; // owned heap allocation. Includes the surrounding "quotation marks"
size_t string_list_index; // position in global string list
} data;
} node_t;
// Global root for parse tree and abstract syntax tree
extern node_t* root;
// The node creation function, used by the parser
node_t* node_create(node_type_t type, size_t n_children, ...);
// Append an element to the given LIST node, returns the list node
node_t* append_to_list_node(node_t* list_node, node_t* element);
// Outputs the entire syntax tree to the terminal
void print_syntax_tree(void);
// Cleans up the entire syntax tree
void destroy_syntax_tree(void);
// Special function used when syntax trees are output as graphviz graphs.
// Implemented in graphviz_output.c
void graphviz_node_print(node_t* root);
#endif // TREE_H

54
ps2/src/vslc.c Normal file
View File

@@ -0,0 +1,54 @@
#include "vslc.h"
#include <getopt.h>
static bool print_full_tree = false;
static const char* usage = "Compiler for VSL. The input program is read from stdin."
"\n"
"Options:\n"
"\t -h \tOutput this text and exit\n"
"\t -t \tOutput the abstract syntax tree\n";
// Command line option parsing
static void options(int argc, char** argv)
{
if (argc == 1)
{
fprintf(stderr, "%s: expected at last one option. See -h for help\n", argv[0]);
exit(EXIT_FAILURE);
}
while(true)
{
switch (getopt(argc, argv, "ht"))
{
default: // Unrecognized option
fprintf(stderr, "%s: See -h for help\n", argv[0]);
exit(EXIT_FAILURE);
case 'h':
printf("%s:\n%s", argv[0], usage);
exit(EXIT_SUCCESS);
case 't':
print_full_tree = true;
break;
case -1: // Done parsing
return;
}
}
}
// Entry point
int main(int argc, char** argv)
{
options(argc, argv);
yyparse(); // Generated from grammar/bison, constructs syntax tree
yylex_destroy(); // Free buffers used by flex
// Operations in tree.c
if (print_full_tree)
print_syntax_tree();
destroy_syntax_tree(); // In tree.c
}

20
ps2/src/vslc.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef VSLC_H
#define VSLC_H
#include <assert.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Definition of the tree node type, and functions for handling the parse tree
#include "tree.h"
// The main driver function of the parser generated by bison
int yyparse();
// A "hidden" cleanup function in flex
int yylex_destroy();
#endif // VSLC_H

View File

@@ -0,0 +1,14 @@
var array[3], other[20]
func sum() {
return array[0] + array[1] + array[2]
}
func main() {
array[0] = 5
array[1] = 1
array[2] = array[1]
other[sum()] = sum()
println("Should be 7:", other[7])
}

View File

@@ -0,0 +1,27 @@
func main(a, b) {
if (1)
println("Always")
if (0)
println("Never")
else
println("This, however!")
if (a > b) {
println(a, ">", b)
}
else if (a < b) {
println(a, "<", b)
}
else {
println(a, "=", b)
}
// Now test dangling else
if (a)
if (b)
println("a & b")
else
println("a, but not b")
}

View File

@@ -0,0 +1,34 @@
func main() {
var a = 1
var b = 4 - 1 - 1
var c = -a * b + 4 * a * 1 + 2
a = a / 2 + 1
if (a * a < b + c)
println("Yes")
if (a == 1 or b == 2)
println("Either!")
if (a == 1 and b == 2)
println("Both!")
if (!a == 0)
println("Yup")
if (a <= b == b > a)
print("Alawys!")
if (a <= b != a >= b)
println("a != b")
if (a == b == (a != b))
println("Never!")
a = a > 4 or b < 2 ? a + 4 : b - 2
c = (a <= b) + --(a >= b)
return c
}

View File

@@ -0,0 +1,12 @@
func main() {
var localVariable
globalVariable = 10
localVariable = 3
println("Global:", globalVariable, " local:", localVariable)
}
// Global variables can be declared anywhere in the file
var globalVariable

View File

@@ -0,0 +1,16 @@
func main() {
print("Should be 7:", identity(first(second(5, 7), first(2, 9))))
}
func identity(argument) {
return argument
}
func first(a, b) {
return a
}
func second(a, b) {
return b
}

View File

@@ -0,0 +1,5 @@
func main() {
print("Hello ")
println("World!")
}

View File

@@ -0,0 +1,30 @@
var global, myArray[10]
func main() {
var a, b = 2
a = 5
var c
global = 3
// A block is itself a statement
{
var d = a + b * c
println("d:", d)
}
if (1) {
var x
x = a*b*c
if (x > 0) {
// Declare a new x, shadowing the outer x
var x
x = a + global
} else {
x = a
myArray[global] = 2
}
println("x:", x)
}
}

View File

@@ -0,0 +1,20 @@
func callMe(i) {
println("i is now", i)
}
func main(start, end) {
var counter = start
while (counter < end) {
callMe(counter)
counter = counter + 1
}
// Go down again using while 1 + break
while (1) {
counter = counter - 1
if (counter < start)
break
callMe(counter)
}
}