From 19e1a8d17bde98527ec4f9bc7cab9751937adf46 Mon Sep 17 00:00:00 2001 From: User <> Date: Wed, 25 Apr 2018 19:43:30 +0200 Subject: [PATCH] included array offsets, added tests, updated report and highlighting --- doc/report-description.tex | 81 ++++++++++--------- doc/report-software.tex | 18 +++-- doc/report-test-program.tex | 10 +-- doc/report-tests.tex | 3 +- src/pp/s1184725/boppi/Annotations.java | 2 +- src/pp/s1184725/boppi/BoppiChecker.java | 11 ++- src/pp/s1184725/boppi/BoppiGenerator.java | 66 ++++++++------- src/pp/s1184725/boppi/antlr/Boppi.g4 | 2 +- src/pp/s1184725/boppi/memlib.iloc | 2 + src/pp/s1184725/boppi/messages.properties | 2 + src/pp/s1184725/boppi/stdlib.iloc | 47 ++++++----- src/pp/s1184725/boppi/test/ArrayTest.java | 38 +++++++-- .../boppi/test/programs/arrayFunctions.boppi | 8 +- .../boppi/test/programs/complexArray.boppi | 10 +-- .../test/programs/fibonacciRecursive.boppi | 2 +- .../boppi/test/programs/simpleArray.boppi | 4 +- src/pp/s1184725/boppi/type/FunctionType.java | 8 +- util/PygmentBoppiLexer.py | 8 +- util/PygmentILOCLexer.py | 24 ++++++ 19 files changed, 218 insertions(+), 128 deletions(-) create mode 100644 util/PygmentILOCLexer.py diff --git a/doc/report-description.tex b/doc/report-description.tex index 4ea54f2..5d69182 100644 --- a/doc/report-description.tex +++ b/doc/report-description.tex @@ -92,7 +92,7 @@ The compound expression simply generates its inner expressions in order. See \cr \hfill \begin{subfigure}{0.7\textwidth} \caption{Generated ILOC} - \begin{minted}{boppi} + \begin{minted}{iloc} loadI 5 => r_1 // 5 loadI 99 => r_1 // 'c' @@ -220,7 +220,7 @@ c := b := x < y; \hfill \begin{subfigure}{0.7\textwidth} \caption{Generated ILOC} - \begin{minted}{boppi} + \begin{minted}{iloc} loadI 4 => r_1 // 4 addI r_arp,0 => r_2 // add offset store r_1 => r_2 // to x @@ -310,7 +310,7 @@ When reading values to variables, the generator first reads from the standard in \begin{figure} \caption{ILOC for printing a single character stored in register r.} \label{character-output} - \begin{minted}{boppi} + \begin{minted}{iloc} cpush r loadI 1 => r_t push r_t @@ -322,7 +322,7 @@ cout "" \begin{figure} \caption{\emph{stdlib} ILOC for writing a boolean.} \label{boolean-output} - \begin{minted}{boppi} + \begin{minted}{iloc} // write a boolean to output // stack: [return address, bool] -> [] stdbout: pop => m_1 // get boolean @@ -341,7 +341,7 @@ sbout_e: pop => m_1 // load return address \begin{figure} \caption{\emph{stdlib} ILOC for reading a single character.} \label{character-input} - \begin{minted}{boppi} + \begin{minted}{iloc} // read a character from input // stack: [return address] -> [char] stdcin: cin "" // get line @@ -461,7 +461,7 @@ fi \hfill \begin{subfigure}{0.7\textwidth} \caption{Generated ILOC} - \begin{minted}{boppi} + \begin{minted}{iloc} loadI 2 => r_1 // 2 loadI 1 => r_2 // 1 cmp_GT r_1,r_2 => r_1 // > @@ -493,7 +493,7 @@ od \hfill \begin{subfigure}{0.7\textwidth} \caption{Generated ILOC} - \begin{minted}{boppi} + \begin{minted}{iloc} jumpI -> while_f1 // to condition while_t0: nop // loop target loadI 1 => r_1 // 1 @@ -661,8 +661,8 @@ print(successor(x)); \section{Arrays} \label{arrays} \paragraph{Syntax} -Arrays add new syntax in three places in the language. It introduces a way to construct an array type of any type and two ways two construct an array. The first way to construct an array is providing an array literal: \verb|[ element1, element2, ... ]|. The second way is to provide the element type and the number of elements: \verb|array( type, length )| where length can be any integer expression. The choice was made to require the element type, because it allows the type checking to only use a synthesized attribute. For the same reason, an array literal must contain at least one item. Lastly, arrays introduce two variable constructions: the array element accessor and the property accessor. The ANTLR rules can be seen in \cref{arrays-syntax}.\\ -The way arrays are declared and defined is contrary to the assignment. While arrays were defined as fixed-size vectors in a previous iteration of the language, this was considered too restrictive in practice. +Arrays add new syntax in three places in the language. It introduces a way to construct an array type of any type and two ways two construct an array. The first way to construct an array is providing an array literal: \verb|[ element1, element2, ... ]|. The second way is to provide the element type, the number of elements and an offset: \verb|array( type, length, offset )| where length and offset can be any integer expression. The choice was made to require the element type, because it allows the type checking to only use a synthesized attribute. For the same reason, an array literal must contain at least one item. Lastly, arrays introduce two variable constructions: the array element accessor and the property accessor. The ANTLR rules can be seen in \cref{arrays-syntax}.\\ +The way arrays are declared and defined is contrary to the project assignment. While arrays were defined as fixed-size vectors in a previous iteration of the language, this was considered too restrictive in practice. \begin{figure} \caption{ANTLR4 code for arrays in Boppi.} @@ -670,7 +670,7 @@ The way arrays are declared and defined is contrary to the assignment. While arr \begin{minted}{antlr} expr : ... - | ARRAY PAROPEN type LISTDELIM size=expr PARCLOSE #defineArray + | ARRAY PAROPEN type LISTDELIM size=expr LISTDELIM offset=expr PARCLOSE | ARROPEN expr (LISTDELIM expr)* ARRCLOSE #literalArray type @@ -717,52 +717,55 @@ od \paragraph{Use} Array variables are not assigned at declaration. As such, the user should heed a warning that a variable may not be assigned, since their value may point anywhere. Moreover, an array may contain undefined elements, which neither the compiler nor the run-time will detect.\\ -Array variables have exactly one named property, their \verb|length|. This is always non-negative for assigned arrays and undefined otherwise. All other types up to here have no properties.\\ -An array literal may contain any positive number of elements, which must all have the same type. The type of the resulting array is, naturally, an array of the elements' type and the length is equal to the number of expressions in the literal.\\ -An array constructor comprises a type, which will be the type of the elements, and a non-negative number of items. Note that the elements will be undefined.\\ +Array variables have exactly two named properties, their \verb|length| and \verb|offset|. The length is always non-negative for assigned arrays and undefined otherwise. The offset is always zero for an array literal and undefined for an unassigned array. All other types up to here have no properties.\\ +An array literal may contain any positive number of elements, which must all have the same type. The type of the resulting array is, naturally, an array of the elements' type. Its length is equal to the number of expressions in the literal and its offset is zero.\\ +An array constructor comprises a type, which will be the type of the elements, a non-negative number of items and an offset. Note that the elements will be undefined.\\ An array accessor may only be used on an array type. The result type will be the element type of the array.\\ Arrays can be compared with each other for equality if they have the same element type.\\ Lastly, an array of characters can be printed to standard output and can be read from standard input.\\ Note that, while an array variable may be defined constant, its elements can still be changed. \paragraph{Semantics} -An array is a finite sequence of items of a single type whose values can be retrieved through a zero-based index. An array literal creates an array exactly large enough to hold all the expressions inside, then evaluates the expressions left-to-right and puts the results in the corresponding array index. An array constructor simply evaluates the requested length and allocates an array of that length, or halts the machine if the length is negative.\\ -Assigning an array to a variable means that variable will point to the array from that point. This means an expression like \verb|array1 := array2;| will result in both variables pointing to the same array, so changing an element of \verb|array1| will change the element for \verb|array2|.\\ +An array is a finite sequence of items of a single type whose values can be retrieved through an index. An array literal creates an array exactly large enough to hold all the expressions inside, then evaluates the expressions left-to-right and puts each result in the respective array index. An array constructor simply evaluates the requested length and offset and allocates an array of that length, or halts the machine if the length is negative.\\ +Assigning an array to a variable means that variable will point to the array from that moment. This means an expression like \verb|array1 := array2;| will result in both variables pointing to the same array, so changing an element of \verb|array1| will change the element for \verb|array2|.\\ An array accessor evaluates the index expression and then returns the element at that index, or halts the machine if that index is out of bounds.\\ An equality check between two arrays compares the length and each element of the array. Note that, for nested arrays, this will compare the addresses of the inner arrays, rather than the length and values within those arrays. \paragraph{Code generation} -The array constructor first evaluates the expression, then generates a check whether the array size is valid and either \verb|halt|s the machine or allocates the array.\\ -An array literal generates the allocation of the array, then, for each expression, evaluates it and puts the result in the array using \verb|storeAI r_res => r_array, c_offset|, with \verb|r_res| the result of the expression, \verb|r_array| the base address of the array and \verb|c_offset| the offset of the particular element calculated at compile-time.\\ -An array access generates the following steps (illustrated in \cref{arrays-access-snippet}): +An array is stored as a contiguous block of data. It comprises a header of two integers (length and offset) followed by a body containing the array elements.\\ +The array constructor (\verb|array(type, length, offset)|) first evaluates the expression, then generates a check whether the array size is valid and either \verb|halt|s the machine or allocates the array. After allocating the array, the offset is evaluated and both the length and offset are stored in the array.\\ +An array literal (\verb|[element,element,..]|) generates the allocation of the array, then, for each expression, evaluates it and puts the result in the array using \verb|storeAI r_res => r_array, c_offset|. Here, \verb|r_res| is the result of the expression, \verb|r_array| the base address of the array and \verb|c_offset| the offset of the particular element calculated at compile-time. This offset starts at \verb|2*INT_SIZE| to accommodate for the array headers, and is incremented with the element size.\\ +An array access (\verb|arr[index]|) generates the following steps (illustrated in \cref{arrays-access-snippet}): \begin{enumerate} - \item Visit the array variable. - \item Load the array's address. - \item Visit the index expression. - \item Load the array's memory size and divide it by the element size. - \item Check whether the calculated index is less than the array's size and not negative. Halt if this is not the case. - \item Multiply the index by the element size to get the offset and add it to the array's base address to get the address of the element. + \item Visit the array variable + \item Load the array's address + \item Visit the index expression + \item Load the array's offset and subtract it from the calculated index + \item Load the array's length and check whether the new index is between zero (inclusive) and the length (exclusive). Halt if this is not the case. + \item Multiply the new index by the element size to get the offset and add it to the array's body address to get the address of the element. \end{enumerate} -Retrieving the length of an array requires a few steps because the length is only stored implicitly. The generator first retrieves the array variable. Then it produces a \verb|load r_temp => r_temp| instruction to get the array's base address, followed by a \verb|addI r_temp, OFFSET_OBJECT_SIZE => r_temp| and \verb|load r_temp => r_temp| to retrieve the memory size. Lastly, it produces \verb|divI r_temp, c_element_size => r_temp| to convert the size to the number of elements. +Retrieving the length and offset of an array is achieved by first retrieving the array's addres and then adding the respective offset of the length (\verb|OFFSET_ARRAY_LENGTH|) or index offset (\verb|OFFSET_ARRAY_OFFSET|). \begin{figure} - \caption{Array access snippet from \cref{arrays-code}.} + \caption{Array access snippet from \cref{arrays-code}. The array reference is stored at \texttt{r\_arp,0} and the requested index is \texttt{7}.} \label{arrays-access-snippet} - \begin{minted}{boppi} - addI r_arp,0 => r_2 // add offset - load r_2 => r_2 // get array object - loadI 0 => r_3 // 0 - loadAI r_2,-4 => r_1 // check array index - divI r_1,4 => r_1 // check array index - cmp_LT r_3,r_1 => r_1 // check array index - cmp_GE r_3,r_nul => r_4 // check array index - and r_1,r_4 => r_4 // check array index - cbr r_4 -> nob5,oob4 // check array index -oob4: haltI 1634692962 // array index out of bounds -nob5: multI r_3,4 => r_3 // multiply index by size - add r_2,r_3 => r_2 // get array index address + \begin{minted}{iloc} + addI r_arp,0 => r_2 // add offset + load r_2 => r_2 // get array object + loadI 7 => r_3 // 7 + loadAI r_2,4 => r_1 // load array offset + sub r_3,r_1 => r_3 // subtract array offset + loadAI r_2,0 => r_1 // load array length + cmp_LT r_3,r_1 => r_1 // check array index + cmp_GE r_3,r_nul => r_4 // check array index + and r_1,r_4 => r_4 // check array index + cbr r_4 -> nob5,oob4 // check array index +oob4: haltI 1634692962 // array index out of bounds +nob5: multI r_3,4 => r_3 // multiply index by size + addI r_3,8 => r_3 // point to array body + add r_2,r_3 => r_2 // get array index address \end{minted} \end{figure} diff --git a/doc/report-software.tex b/doc/report-software.tex index 5fc3275..35ad99f 100644 --- a/doc/report-software.tex +++ b/doc/report-software.tex @@ -10,11 +10,11 @@ The compiler chain is written in Java mostly, with a preamble (\emph{memlib}) wr \section{Toolchain helper} -The toolchain helper \emph{pp.s1184725.boppi.ToolChain} contains various helper methods for compiling and executing programs. Notably, a \verb|Logger| object is required for nearly all methods. This way warnings and errors can be reported instead of throwing exceptions or failing silently.\\ +The toolchain helper \emph{pp.s1184725.boppi.ToolChain} contains various helper methods for compiling and executing programs. Notably, a \verb|Logger| object is required for nearly all methods. The stages of the compiler try to walk through the source code best-effort and report warnings and errors via this logger.\\ Moreover, the helper contains a method to print the abstract syntax tree (\emph{AST}) of a Boppi program as a graphviz graph. The AST can be produced at any point in the compilation process. After the checking phase and the generating phase the AST will be annotated with types, variables and registers used.\\ -The helper also provides a method to modify a \verb|Logger| object to append logged items to a list instead of the standard output. This can be useful for collecting problems and displaying them in a window or file and for test automation.\\ +The helper also provides a method to modify a \verb|Logger| object to collect a list of errors rather than printing them to the standard output. This can be useful for collecting problems and displaying them in a window or for test automation.\\ @@ -23,9 +23,9 @@ The helper also provides a method to modify a \verb|Logger| object to append log \section{Checker} The correctness checker \emph{pp.s1184725.boppi.BoppiChecker} performs type checking, binding identifiers to variables, checking constants are assigned once and checking variables are assigned before being used. This is done on a bare parse tree of a Boppi program.\\ -The checker is implemented as a tree visitor, since it allows to change state between visiting different children of a node. This is advantageous for e.g. the if-then expression in which a scope has to be opened between the test and the conditional code (see \cref{conditionals}).\\ +The checker is implemented as a tree visitor, since it allows to change state between visiting different children of a node. This is advantageous for keeping, for example, the if-then expression concise. With a visitor, a scope can be opened between the test and the conditional code (see \cref{conditionals}) while using a single, action-less ANTLR rule. With a listener this would require either an action in the ANTLR rule or a sub-rule for opening a scope.\\ -The only inherited attributes during checking are the booleans \verb|inLhs| and \verb|inType|. These are implemented as local variables rather than rule attributes. \verb|inLhs| tracks whether a variable is being assigned or is used in an expression. This information is used to decide whether a constant is assigned a value twice and whether a variable is used before being initialized. \verb|inType| tracks whether a variable is used in a type-level expression, in which it may be used regardless of whether it is initialized.\\ +The only inherited attributes during checking are the booleans \verb|inLhs| and \verb|inType|. These are implemented as local variables rather than rule attributes. \verb|inLhs| tracks whether a variable is being assigned or is used in an expression. This information is used to decide at compile time whether a constant is assigned a value twice and whether a variable is used before being initialized. \verb|inType| tracks whether a variable is used in a type-level expression, in which it may be used regardless of whether it is initialized.\\ The synthesised attributes during checking are the type of a node (\verb|Annotations::types|) and, when applicable, the variable belonging to an identifier (\verb|Annotations::variables|) and the local variables of a function (\verb|Annotations::function|). The latter are only used in the generating phase.\\ @@ -62,7 +62,7 @@ public Reg visitInfix2(Infix2Context ctx) { \end{subfigure} \hfill \begin{subfigure}{0.2\textwidth} - \begin{minted}{boppi} + \begin{minted}{iloc} loadI 32 => r loadI 10 => g add r, g => r @@ -110,6 +110,14 @@ function main() { +\section{FunctionScope} +The lexical scope class \emph{pp.s1184725.boppi.FunctionScope} contains local variables within a function. An object is created with a given lexical depth, which can be retrieved at any time.\\ +The \verb|FunctionScope::addVariable| method produces a variable of the provided type at the FunctionScope's lexical depth and current offset. This variable is both recorded in the object and returned. This method is used by the symbol table to produce a variable for each symbol.\\ +The generator uses the function scope to determine how large the local data size for a function has to be and to allocate and deallocate objects where applicable. + + + + \section{FunctionScope} The lexical scope class \emph{pp.s1184725.boppi.FunctionScope} contains local variables within a function. An object is created with a given lexical depth, which can be retrieved at any time.\\ The \verb|FunctionScope::addVariable| method produces a variable of the provided type at the FunctionScope's lexical depth and current offset. This variable is both recorded in the object and returned. This method is used by the symbol table to produce a variable for each symbol.\\ diff --git a/doc/report-test-program.tex b/doc/report-test-program.tex index 76fbf2d..af56d2b 100644 --- a/doc/report-test-program.tex +++ b/doc/report-test-program.tex @@ -11,7 +11,7 @@ The program works by repeatedly asking for a number. If the user provides a posi \begin{minted}{boppi} function (int)->int memoizedFib() { var int[] memo; - memo := array(int, 50); + memo := array(int, 50, 0); function int fib(int n) { if n < 1 || n > 46 then @@ -46,7 +46,7 @@ od; \caption{Examples of input and output on \emph{fibonacciRecursive}.} \label{test-example-runs} \begin{subfigure}{0.2\textwidth} - \begin{minted}{boppi} + \begin{minted}{text} > 1 <<< 1 > 0 @@ -54,13 +54,13 @@ od; \end{subfigure} \hfill \begin{subfigure}{0.2\textwidth} - \begin{minted}{boppi} + \begin{minted}{text} > -5 \end{minted} \end{subfigure} \hfill \begin{subfigure}{0.2\textwidth} - \begin{minted}{boppi} + \begin{minted}{text} > 3 <<< 2 > 4 @@ -74,7 +74,7 @@ od; \end{subfigure} \hfill \begin{subfigure}{0.2\textwidth} - \begin{minted}{boppi} + \begin{minted}{text} > 46 <<< 1836311903 > 47 diff --git a/doc/report-tests.tex b/doc/report-tests.tex index 901b572..e2ab52a 100644 --- a/doc/report-tests.tex +++ b/doc/report-tests.tex @@ -80,9 +80,10 @@ Arrays are tested for: \begin{itemize} \item correctly parsing array types and nested arrays \item correctly parsing array accessors - \item correctly parsing variable properties (\emph{array.length}) + \item correctly parsing variable properties (\emph{array.length}, \emph{array.offset}) \item correctly checking array literals \item correctly checking array constructors and assigning them to arrays + \item correctly rejecting an assignment to an array property \item correctly returning the element type of an array access \item correctly performing a bounds check on an array access during run-time \item correctly performing equality checks between arrays diff --git a/src/pp/s1184725/boppi/Annotations.java b/src/pp/s1184725/boppi/Annotations.java index 1296e7a..a0b30c5 100644 --- a/src/pp/s1184725/boppi/Annotations.java +++ b/src/pp/s1184725/boppi/Annotations.java @@ -37,7 +37,7 @@ public class Annotations { /** * Maps variable instances to the AST node they reside in. */ - public Map,ParserRuleContext> variableRoot; + public Map, ParserRuleContext> variableRoot; /** * Creates a new annotations object with empty maps. diff --git a/src/pp/s1184725/boppi/BoppiChecker.java b/src/pp/s1184725/boppi/BoppiChecker.java index bc33b46..4ee18d0 100644 --- a/src/pp/s1184725/boppi/BoppiChecker.java +++ b/src/pp/s1184725/boppi/BoppiChecker.java @@ -182,7 +182,7 @@ public class BoppiChecker extends BoppiBaseVisitor { parameterTypes = TupleType.UNIT; Type returnType = ctx.result != null ? visit(ctx.result) : SimpleType.VOID; - FunctionType type = new FunctionType(returnType, parameterTypes); + FunctionType type = new FunctionType(parameterTypes, returnType); Variable func = an.symbols.put(ctx.name.getText(), type); func.setConstant(true); @@ -208,6 +208,7 @@ public class BoppiChecker extends BoppiBaseVisitor { @Override public Type visitDefineArray(DefineArrayContext ctx) { checkConstraint(visit(ctx.size), SimpleType.INT, ctx); + checkConstraint(visit(ctx.offset), SimpleType.INT, ctx); return new ArrayType(visit(ctx.type())); } @@ -256,6 +257,9 @@ public class BoppiChecker extends BoppiBaseVisitor { Type rht = visit(ctx.rhs); checkConstraint(lht, rht, ctx); + if (lht instanceof ArrayType && (((ArrayType) lht).getType() instanceof ArrayType)) + log.warning(getError(ctx, Messages.getString("BoppiChecker.17"), lht, rht)); //$NON-NLS-1$ + switch (ctx.op.getType()) { case BoppiLexer.LT: case BoppiLexer.LEQ: @@ -435,7 +439,10 @@ public class BoppiChecker extends BoppiBaseVisitor { String prop = ctx.IDENTIFIER().getText(); if (varType instanceof ArrayType) { - if (prop.equals("length")) { //$NON-NLS-1$ + if (prop.equals("length") || prop.equals("offset")) { //$NON-NLS-1$ //$NON-NLS-2$ + if (inLhs) + log.severe(getError(ctx, Messages.getString("BoppiChecker.19"))); //$NON-NLS-1$ + return SimpleType.INT; } else { log.severe(getError(ctx, Messages.getString("BoppiChecker.9"), prop)); //$NON-NLS-1$ diff --git a/src/pp/s1184725/boppi/BoppiGenerator.java b/src/pp/s1184725/boppi/BoppiGenerator.java index 875f819..b7e532a 100644 --- a/src/pp/s1184725/boppi/BoppiGenerator.java +++ b/src/pp/s1184725/boppi/BoppiGenerator.java @@ -33,7 +33,7 @@ public class BoppiGenerator extends BoppiBaseVisitor { private static final Num OFFSET_ARP = new Num(-4), OFFSET_RETURN_ADDR = new Num(-8), OFFSET_RETURN_VAL = new Num(-12), OFFSET_AL = new Num(-16), OFFSET_FUNCREF_ADDR = ZERO, OFFSET_FUNCREF_ARP = new Num(4), OFFSET_FUNCREF_ARSIZE = new Num(8), OFFSET_REF_COUNT = new Num(-8), - OFFSET_REF_SIZE = new Num(-4); + OFFSET_REF_SIZE = new Num(-4), OFFSET_ARRAY_LENGTH = ZERO, OFFSET_ARRAY_OFFSET = new Num(4); /** * Unknown type used in a read expression */ @@ -618,18 +618,21 @@ public class BoppiGenerator extends BoppiBaseVisitor { @Override public Reg visitDefineArray(DefineArrayContext ctx) { Type elementType = ((ArrayType) an.types.get(ctx)).getType(); - Reg arrSize = visit(ctx.size); + Reg arrLength = visit(ctx.size); - emit("produce array size", OpCode.multI, arrSize, new Num(elementType.getSize()), arrSize); //$NON-NLS-1$ + return regPool.blockReg(arrLength, () -> regPool.withReg((arrSize, addr) -> { + Label validLength = makeLabel("aszt"), invalidLength = makeLabel("aszf"); //$NON-NLS-1$ //$NON-NLS-2$ + emit("check length non-negative", OpCode.cmp_GE, arrLength, RegisterPool.ZERO, arrSize); //$NON-NLS-1$ + emit("", OpCode.cbr, arrSize, validLength, invalidLength); //$NON-NLS-1$ + emit("invalid array size", invalidLength, OpCode.haltI, ERROR_ILLEGAL_ARRAY_SIZE); //$NON-NLS-1$ + emit("valid array size", validLength, OpCode.multI, arrLength, new Num(elementType.getSize()), arrSize); //$NON-NLS-1$ - return regPool.blockReg(arrSize, () -> regPool.withReg((temp) -> { - Label validSize = makeLabel("aszt"), invalidSize = makeLabel("aszf"); //$NON-NLS-1$ //$NON-NLS-2$ - emit("check size non negative", OpCode.cmp_GE, arrSize, RegisterPool.ZERO, temp); //$NON-NLS-1$ - emit("", OpCode.cbr, temp, validSize, invalidSize); //$NON-NLS-1$ - emit("invalid array size", invalidSize, OpCode.haltI, ERROR_ILLEGAL_ARRAY_SIZE); //$NON-NLS-1$ - emit("valid array size", validSize, OpCode.nop); //$NON-NLS-1$ + emit("reserve array properties", OpCode.addI, arrSize, new Num(2 * Machine.INT_SIZE), arrSize); //$NON-NLS-1$ - malloc(temp, arrSize); + malloc(addr, arrSize); + + emit("store length", OpCode.storeAI, arrLength, addr, OFFSET_ARRAY_LENGTH); //$NON-NLS-1$ + emit("store offset", OpCode.storeAI, visit(ctx.offset), addr, OFFSET_ARRAY_OFFSET); //$NON-NLS-1$ })); } @@ -654,15 +657,6 @@ public class BoppiGenerator extends BoppiBaseVisitor { incrementReference(type, result); }); - if (ctx.variable() instanceof VariablePropertyContext) { - VariablePropertyContext vpctx = (VariablePropertyContext) ctx.variable(); - Type varType = an.types.get(vpctx.variable()); - - if (varType instanceof ArrayType) - emit("divide by element size", OpCode.divI, result, //$NON-NLS-1$ - new Num(((ArrayType) varType).getType().getSize()), result); - } - return result; } @@ -799,15 +793,22 @@ public class BoppiGenerator extends BoppiBaseVisitor { @Override public Reg visitLiteralArray(LiteralArrayContext ctx) { - int elements = ctx.expr().size(); + int count = ctx.expr().size(); int elementSize = ((ArrayType) an.types.get(ctx)).getType().getSize(); return regPool.withReg((addr) -> { - malloc(addr, elements * elementSize); + malloc(addr, count * elementSize + 2 * Machine.INT_SIZE); - for (int i = 0; i < elements; i++) { + regPool.withReg((temp) -> { + emit("load array length", OpCode.loadI, new Num(count), temp); //$NON-NLS-1$ + emit("store array length", OpCode.storeAI, temp, addr, OFFSET_ARRAY_LENGTH); //$NON-NLS-1$ + }); + + emit("store array offset", OpCode.storeAI, RegisterPool.ZERO, addr, OFFSET_ARRAY_OFFSET); //$NON-NLS-1$ + + for (int i = 0; i < count; i++) { Reg result = visit(ctx.expr(i)); - Num offset = new Num(i * elementSize); + Num offset = new Num(i * elementSize + 2 * Machine.INT_SIZE); if (elementSize == 1) emit("store array element", OpCode.cstoreAI, result, addr, offset); //$NON-NLS-1$ @@ -951,8 +952,9 @@ public class BoppiGenerator extends BoppiBaseVisitor { regPool.withReg((r1, r2) -> { Label outOfBounds = makeLabel("oob"), inBounds = makeLabel("nob"); //$NON-NLS-1$ //$NON-NLS-2$ - emit("check array index", OpCode.loadAI, addr, OFFSET_REF_SIZE, r1); //$NON-NLS-1$ - emit("check array index", OpCode.divI, r1, new Num(type.getType().getSize()), r1); //$NON-NLS-1$ + emit("load array offset", OpCode.loadAI, addr, OFFSET_ARRAY_OFFSET, r1); //$NON-NLS-1$ + emit("subtract array offset", OpCode.sub, offset, r1, offset); //$NON-NLS-1$ + emit("load array length", OpCode.loadAI, addr, OFFSET_ARRAY_LENGTH, r1); //$NON-NLS-1$ emit("check array index", OpCode.cmp_LT, offset, r1, r1); //$NON-NLS-1$ emit("check array index", OpCode.cmp_GE, offset, RegisterPool.ZERO, r2); //$NON-NLS-1$ emit("check array index", OpCode.and, r1, r2, r2); //$NON-NLS-1$ @@ -961,6 +963,7 @@ public class BoppiGenerator extends BoppiBaseVisitor { emit("multiply index by size", inBounds, OpCode.multI, offset, new Num(type.getType().getSize()), //$NON-NLS-1$ offset); + emit("point to array body", OpCode.addI, offset, new Num(2 * Machine.INT_SIZE), offset); //$NON-NLS-1$ emit("get array index address", OpCode.add, addr, offset, addr); //$NON-NLS-1$ }); }); @@ -977,11 +980,16 @@ public class BoppiGenerator extends BoppiBaseVisitor { emit("get object address", OpCode.load, addr, addr); //$NON-NLS-1$ if (innerType instanceof ArrayType) { - emit("add size offset", OpCode.addI, addr, OFFSET_REF_SIZE, addr); //$NON-NLS-1$ - return addr; - } else { - return addr; + switch (ctx.IDENTIFIER().getText()) { + case "length": //$NON-NLS-1$ + emit("point to length", OpCode.addI, addr, OFFSET_ARRAY_LENGTH, addr); //$NON-NLS-1$ + break; + case "offset": //$NON-NLS-1$ + emit("point to offset", OpCode.addI, addr, OFFSET_ARRAY_OFFSET, addr); //$NON-NLS-1$ + break; + } } + return addr; } @Override diff --git a/src/pp/s1184725/boppi/antlr/Boppi.g4 b/src/pp/s1184725/boppi/antlr/Boppi.g4 index 8c436c1..712453c 100644 --- a/src/pp/s1184725/boppi/antlr/Boppi.g4 +++ b/src/pp/s1184725/boppi/antlr/Boppi.g4 @@ -37,7 +37,7 @@ expr | WHILEOPEN cond=stats WHILETRUE onTrue=stats WHILECLOSE #while | variable PAROPEN (expr (LISTDELIM expr)*)? PARCLOSE #call | variable #getVariable - | ARRAY PAROPEN type LISTDELIM size=expr PARCLOSE #defineArray + | ARRAY PAROPEN type LISTDELIM size=expr LISTDELIM offset=expr PARCLOSE #defineArray | ARROPEN expr (LISTDELIM expr)* ARRCLOSE #literalArray | LITERAL10 #literalInteger | CHAR #literalCharacter diff --git a/src/pp/s1184725/boppi/memlib.iloc b/src/pp/s1184725/boppi/memlib.iloc index 527b361..d728bc0 100644 --- a/src/pp/s1184725/boppi/memlib.iloc +++ b/src/pp/s1184725/boppi/memlib.iloc @@ -5,6 +5,8 @@ off_oref <- -8 off_osize <- -4 off_next <- 0 off_size <- 4 +off_arlen <- 0 +off_aroff <- 4 // memlib - simple memory allocator for ILOC // diff --git a/src/pp/s1184725/boppi/messages.properties b/src/pp/s1184725/boppi/messages.properties index 785f940..64932fc 100644 --- a/src/pp/s1184725/boppi/messages.properties +++ b/src/pp/s1184725/boppi/messages.properties @@ -7,6 +7,8 @@ BoppiChecker.13=Constant '%s' may already be assigned. BoppiChecker.14=Variable '%s' may not be assigned. BoppiChecker.15=Variable '%s' is not assigned. BoppiChecker.16=Cannot make array of void elements +BoppiChecker.17=Elements of '%s' and '%s' will be compared by reference. +BoppiChecker.19=Cannot overwrite array properties BoppiChecker.2=Expected %d arguments but got %d. BoppiChecker.3='%s' is not a function. BoppiChecker.4=Variable must have a type %s, %s, %s or function. diff --git a/src/pp/s1184725/boppi/stdlib.iloc b/src/pp/s1184725/boppi/stdlib.iloc index 9133d8d..850a50d 100644 --- a/src/pp/s1184725/boppi/stdlib.iloc +++ b/src/pp/s1184725/boppi/stdlib.iloc @@ -23,13 +23,15 @@ sbout_e: pop => m_1 // load return address // write an array of characters (a string) to output // stack: [return address, address] -> [] stdsout: pop => m_1 // get address - loadAI m_1,@off_osize => m_2 // get length + loadAI m_1,@off_arlen => m_2 // get length + addI m_1,8 => m_1 // point to array body sout_lc: cbr m_2 -> sout_ll,sout_le // check if any character to push sout_ll: subI m_2, 1 => m_2 // iterate backward cloadAO m_1, m_2 => m_c // get character cpush m_c // push character jumpI -> sout_lc // repeat -sout_le: loadAI m_1,@off_osize => m_2 // get length +sout_le: subI m_1,8 => m_1 // point to array header + loadAI m_1,@off_arlen => m_2 // get length push m_2 // push string length cout "" // print string pop => m_1 // get return address @@ -53,27 +55,32 @@ scin_le: loadI 0 => m_0 // reset zero register // read an array of characters (a string) from input -// the memalloc label cannot be used, so address 28 is used instead +// the memalloc label cannot be used when loading libraries as +// separate ILOC programs, so address 28 is used instead // stack: [return address] -> [address] -stdsin: cin "" // get line - pop => m_2 // get length - push m_2 // save length - loadI #ssin_a => m_1 // call malloc - push m_1 // call malloc - push m_2 // call malloc - loadI 28 => m_c // call malloc - jump -> m_c // call malloc -ssin_a: pop => m_1 // get array address - pop => m_2 // get length - i2i m_1 => m_n // load character iterator -ssin_c: cbr m_2 -> ssin_l, ssin_e // pop characters into the array +stdsin: cin "" // get line + pop => m_2 // get length + push m_2 // save length + addI m_2,8 => m_2 // add header size + loadI #ssin_a => m_1 // call malloc + push m_1 // call malloc + push m_2 // call malloc + loadI 28 => m_c // call malloc + jump -> m_c // call malloc +ssin_a: pop => m_1 // get array address + pop => m_2 // get length + storeAI m_2 => m_1,@off_arlen // store array length + storeAI m_0 => m_1,@off_aroff // store array offset + i2i m_1 => m_n // load character iterator + addI m_n,8 => m_n // point to array body +ssin_c: cbr m_2 -> ssin_l, ssin_e // pop characters into the array ssin_l: subI m_2,1 => m_2 - cpop => m_c // pop character - cstore m_c => m_n // save character - addI m_n,1 => m_n // increment iterator + cpop => m_c // pop character + cstore m_c => m_n // save character + addI m_n,1 => m_n // increment iterator jumpI -> ssin_c -ssin_e: pop => m_2 // get return address - push m_1 // push array address +ssin_e: pop => m_2 // get return address + push m_1 // push array address jump -> m_2 diff --git a/src/pp/s1184725/boppi/test/ArrayTest.java b/src/pp/s1184725/boppi/test/ArrayTest.java index 525b6b7..8fa4c41 100644 --- a/src/pp/s1184725/boppi/test/ArrayTest.java +++ b/src/pp/s1184725/boppi/test/ArrayTest.java @@ -39,6 +39,9 @@ public class ArrayTest { BoppiTests.parseString("var int[] arr; arr := array(5)"); assertThat(BoppiTests.log, hasSize(1)); + + BoppiTests.parseString("var int[] arr; arr := array(int, 3)"); + assertThat(BoppiTests.log, hasSize(1)); } /** @@ -67,11 +70,20 @@ public class ArrayTest { BoppiTests.checkString("var int a; var int[] arr; a := arr"); assertThat(BoppiTests.log, not(empty())); - BoppiTests.checkString("var int[] arr; arr := array(char, 5)"); + BoppiTests.checkString("var int[] arr; arr := array(char, 5, 0)"); assertThat(BoppiTests.log, not(empty())); BoppiTests.checkString("var int[] arr; arr := ['a', 'c']"); assertThat(BoppiTests.log, not(empty())); + + BoppiTests.checkString("var int[] arr; print(arr.offset)"); + assertThat(BoppiTests.log, not(empty())); + + BoppiTests.checkString("var int[] arr; arr.offset := 3"); + assertThat(BoppiTests.log, not(empty())); + + BoppiTests.checkString("var int[] arr; arr := [1,2]; arr.offset := 3"); + assertThat(BoppiTests.log, not(empty())); } /** @@ -91,7 +103,17 @@ public class ArrayTest { */ @Test public void correctArrayGeneration() { - BoppiTests.compileAndRunString("var int n; read(n); var int[] arr; arr := array(int, n); print(arr.length)", "7"); + BoppiTests.compileAndRunString("var int n; read(n); var int[] arr; arr := array(int, n, 0); print(arr.length)", "7"); + assertThat(BoppiTests.vm.getInterrupt(), is(0)); + assertThat(BoppiTests.log, is(empty())); + assertThat(BoppiTests.out, is(arrayContaining("7"))); + + BoppiTests.compileAndRunString("var int n; read(n); var int[] arr; arr := array(int, n, 5); print(arr.length)", "4"); + assertThat(BoppiTests.vm.getInterrupt(), is(0)); + assertThat(BoppiTests.log, is(empty())); + assertThat(BoppiTests.out, is(arrayContaining("4"))); + + BoppiTests.compileAndRunString("var int n; var int[] arr; arr := array(int, 4, 5); arr[8] := read(n); print(arr[arr.offset+3])", "7"); assertThat(BoppiTests.vm.getInterrupt(), is(0)); assertThat(BoppiTests.log, is(empty())); assertThat(BoppiTests.out, is(arrayContaining("7"))); @@ -127,13 +149,19 @@ public class ArrayTest { */ @Test public void wrongArrayGeneration() { - BoppiTests.compileAndRunString("var int[] arr; arr := array(int, 10); arr[10] := 5"); + BoppiTests.compileAndRunString("var int[] arr; arr := array(int, 10, 0); arr[10] := 5"); assertThat(BoppiTests.vm.getInterrupt(), is(not(0))); - BoppiTests.compileAndRunString("var int[] arr; arr := array(int, 10); arr[-1]"); + BoppiTests.compileAndRunString("var int[] arr; arr := array(int, 10, 0); arr[-1]"); assertThat(BoppiTests.vm.getInterrupt(), is(not(0))); - BoppiTests.compileAndRunString("var int[] arr; arr := array(int, -1)"); + BoppiTests.compileAndRunString("var int[] arr; arr := array(int, 10, 1); arr[0]"); + assertThat(BoppiTests.vm.getInterrupt(), is(not(0))); + + BoppiTests.compileAndRunString("var int[] arr; arr := array(int, 10, -20); arr[0]"); + assertThat(BoppiTests.vm.getInterrupt(), is(not(0))); + + BoppiTests.compileAndRunString("var int[] arr; arr := array(int, -1, 0)"); assertThat(BoppiTests.vm.getInterrupt(), is(not(0))); } diff --git a/src/pp/s1184725/boppi/test/programs/arrayFunctions.boppi b/src/pp/s1184725/boppi/test/programs/arrayFunctions.boppi index 77451be..5395053 100644 --- a/src/pp/s1184725/boppi/test/programs/arrayFunctions.boppi +++ b/src/pp/s1184725/boppi/test/programs/arrayFunctions.boppi @@ -1,9 +1,9 @@ function char[] concat(char[] a, char[] b) { - var char[] result; result := array(char, a.length+b.length); + var char[] result; result := array(char, a.length+b.length, a.offset); var int i; i := 0; while i < result.length do - result[i] := if i < a.length then a[i] else b[i-a.length] fi; + result[i+result.offset] := if i < a.length then a[i+a.offset] else b[i-a.length+b.offset] fi; i := i+1; od; @@ -11,7 +11,7 @@ function char[] concat(char[] a, char[] b) { }; function char[] substring(char[] s, int start, int stop) { - var char[] result; result := array(char, stop-start); + var char[] result; result := array(char, stop-start, 0); var int i; i := 0; while i < result.length do @@ -53,7 +53,7 @@ function char[] itoa(int n) { }; function char[] strjoin(char[][] strings, char[] sep) { - var char[] result; result := array(char, 0); + var char[] result; result := array(char, 0, 0); var int i; i := 0; while i < strings.length do diff --git a/src/pp/s1184725/boppi/test/programs/complexArray.boppi b/src/pp/s1184725/boppi/test/programs/complexArray.boppi index a3ae053..b07879f 100644 --- a/src/pp/s1184725/boppi/test/programs/complexArray.boppi +++ b/src/pp/s1184725/boppi/test/programs/complexArray.boppi @@ -19,10 +19,10 @@ print(arr3[2][0][1]); // array passing -var int[][] target; target := array(int[], 2); +var int[][] target; target := array(int[], 2, 0); function int[] populate(int start) { - var int[] arr; arr := array(int, 4); + var int[] arr; arr := array(int, 4, 0); var int i; i := 0; while i < 4 do @@ -44,7 +44,7 @@ print(target[1][3]); var target target2; target2 := target; -var int[] firstRow; firstRow := array(int, 4); +var int[] firstRow; firstRow := array(int, 4, 0); firstRow := target2[0]; print(firstRow[2]); @@ -53,7 +53,7 @@ print(firstRow[2]); // function array function int[] mapArray((int)->int f, int[] arr) { - var int[] newArr; newArr := array(int, arr.length); + var int[] newArr; newArr := array(int, arr.length, arr.offset); var int i; i := 0; while i < arr.length do newArr[i] := f(arr[i]); @@ -64,7 +64,7 @@ function int[] mapArray((int)->int f, int[] arr) { function int increment(int a) a+1; -var int[] myArray; myArray := array(int, 9); +var int[] myArray; myArray := array(int, 9, 0); i := 0; while i < myArray.length do diff --git a/src/pp/s1184725/boppi/test/programs/fibonacciRecursive.boppi b/src/pp/s1184725/boppi/test/programs/fibonacciRecursive.boppi index 8dfcb9a..ed1a709 100644 --- a/src/pp/s1184725/boppi/test/programs/fibonacciRecursive.boppi +++ b/src/pp/s1184725/boppi/test/programs/fibonacciRecursive.boppi @@ -2,7 +2,7 @@ function (int)->int memoizedFib() { var int[] memo; - memo := array(int, 50); + memo := array(int, 50, 0); function int fib(int n) { if n < 2 then diff --git a/src/pp/s1184725/boppi/test/programs/simpleArray.boppi b/src/pp/s1184725/boppi/test/programs/simpleArray.boppi index 018cfb8..d9bc566 100644 --- a/src/pp/s1184725/boppi/test/programs/simpleArray.boppi +++ b/src/pp/s1184725/boppi/test/programs/simpleArray.boppi @@ -1,4 +1,4 @@ -var int[] arr; arr := array(int, 5); +var int[] arr; arr := array(int, 5, 0); var int i; i := 0; @@ -15,7 +15,7 @@ var char[] arr2; arr2 := ['H', 'a']; print(arr2[0], arr2[1]); -var char[] arr3; arr3 := array(char, 2); +var char[] arr3; arr3 := array(char, 2, 0); arr3[1] := 'a'; arr3[0] := 'H'; diff --git a/src/pp/s1184725/boppi/type/FunctionType.java b/src/pp/s1184725/boppi/type/FunctionType.java index 4804959..2ef7cbf 100644 --- a/src/pp/s1184725/boppi/type/FunctionType.java +++ b/src/pp/s1184725/boppi/type/FunctionType.java @@ -3,7 +3,7 @@ package pp.s1184725.boppi.type; import pp.iloc.eval.Machine; /** - * The (->) type. Takes exactly two types as arguments. + * The (->) type. Takes a 'from' type and a 'to' type as arguments. * * @author Frank Wibbelink */ @@ -13,12 +13,12 @@ public class FunctionType implements ReferenceType { /** * Creates a new function type from the given parameter and return types. * - * @param returnType - * the result type when applying an argument to the function * @param parameter * the input type for this function type + * @param returnType + * the result type when applying an argument to the function */ - public FunctionType(Type returnType, Type parameter) { + public FunctionType(Type parameter, Type returnType) { result = returnType; argument = parameter; } diff --git a/util/PygmentBoppiLexer.py b/util/PygmentBoppiLexer.py index 67d478d..dcfed3c 100644 --- a/util/PygmentBoppiLexer.py +++ b/util/PygmentBoppiLexer.py @@ -12,12 +12,12 @@ class BoppiLexer(RegexLexer): 'root': [ (r'/\*.*?\*/', Comment), (r'//.*?$', Comment), - (r'(read|print|if|then|else|fi|while|do|od)\b', Keyword), + (r'(read|print|if|then|else|fi|while|do|od|array)\b', Keyword), (r'(true|false)\b', Keyword.Constant), - (r'(var|function)\b', Keyword.Declaration), + (r'(var|function|const)\b', Keyword.Declaration), (r'(int|bool|char)\b', Keyword.Type), - (r'\(|\)|\{|\}|;|,', Punctuation), - (r'\+|-|!|\*|/|<=|>=|<>|==|<|>|&&|\|\||->|:=', Operator), + (r'\(|\)|\[|\]|\{|\}|;|,', Punctuation), + (r'\+|-|!|\*|/|<=|>=|<>|==|<|>|&&|\|\||->|:=|\.', Operator), (r'[A-Za-z_][A-Za-z0-9_]*', Name.Variable), (r'0|[1-9][0-9]*', Number.Integer), (r'\'.\'', String.Char), diff --git a/util/PygmentILOCLexer.py b/util/PygmentILOCLexer.py new file mode 100644 index 0000000..e17826b --- /dev/null +++ b/util/PygmentILOCLexer.py @@ -0,0 +1,24 @@ +from pygments.lexer import RegexLexer +from pygments.token import * + +__all__ = ['ILOCLexer'] + +class ILOCLexer(RegexLexer): + name = 'Boppi' + aliases = ['iloc'] + filenames = ['*.iloc'] + + tokens = { + 'root': [ + (r'//.*?$', Comment), + (r'(nop|add|sub|mult|div|addI|subI|rsubI|multI|divI|rdivI|lshift|lshiftI|rshift|rshiftI|or|orI|and|andI|xor|xorI|loadI|load|loadAI|loadAO|cload|cloadAI|cloadAO|store|storeAI|storeAO|cstore|cstoreAI|cstoreAO|i2i|c2c|c2i|i2c|cmp_LT|cmp_LE|cmp_EQ|cmp_GE|cmp_GT|cmp_NE|cbr|jumpI|jump|tbl|push|pop|cpush|cpop|in|out|cin|cout|halt|haltI)\b', Keyword), + (r'\[|\]|;|,|=>|->', Punctuation), + (r'<-', Operator), + (r'@[A-Za-z][A-Za-z0-9_]*', Name.Variable.Global), + (r'#[A-Za-z][A-Za-z0-9_]*', Name.Label), + (r'[A-Za-z][A-Za-z0-9_]*', Name.Variable), + (r'-?[0-9]+', Number.Integer), + (r'"([^"\n\r]|\\")*"', String), + (r'.+?', Text) + ] + }