finalized report

This commit is contained in:
User 2018-02-18 19:15:37 +01:00
parent c8c13e5d96
commit ecd3072e17
24 changed files with 2391 additions and 154 deletions

View File

@ -0,0 +1,765 @@
000 // memlib - simple memory allocator for ILOC
001 //
002 // This library maintains a single-linked list (of monotonically increasing addresses) of free
003 // slots in memory and a reference count for occupied slots. Each slot contains a tuple `(next
004 // free slot address,size)` followed by `size` free bytes. The memory is initiliased with a
005 // pointer to the first free slot at `#fst_ptr` and one free slot at `#slot1` of size
006 // `brk-slot1-header`. When a piece of memory is requested for allocation, a tuple
007 // `(reference count, size)` followed by `size` bytes for the object is formed. The calling
008 // convention of the functions in this library consists of pushing the return address followed by
009 // any arguments (in order) to the stack. Any return values will be pushed to the stack likewise.
010 //
011 // Requires a register `brk` for the highest address available for heap allocation.
012 // Requires an instruction `halt` when memory runs out
013 //
014 // @author Frank Wibbelink
015 // initialise
016 memlib_: loadI 0 => m_0
017 loadI @slot1 => m_1
018 subI brk,@slot1 => m_2
019 subI m_2,@header => m_2
020 storeAI m_1 => m_0,@fst_ptr // pointer to first slot
021 storeAI m_0 => m_1,@off_next // next slot is null
022 storeAI m_2 => m_1,@off_size // first slot size
023 jumpI -> ememlib_
024 // allocate memory
025 // searches through the free slots for either a slot size of ~~exactly #4+8 or a size of~~ at
026 // least #4+16
027 // stack: [return address, object size] -> [object address]
028 memalloc: loadI 0 => m_0
029 pop => m_s // load size
030 loadI @fst_ptr => m_p // load previous address (base pointer)
031 loadAI m_p,@off_next => m_c // load current address (first slot)
032 ma_loop: cmp_EQ m_0,m_c => m_1 // check if address pointer is null
033 cbr m_1 -> ma_null,ma_cont // if null, go to end and return null pointer
034 ma_cont: loadAI m_c,@off_size => m_1 // load slot size
035 cmp_EQ m_1,m_s => m_2 // check if request fits exactly
036 cbr m_2 -> ma_yxct,ma_nxct
037 ma_nxct: subI m_1,@header => m_1 // subtract free slot size
038 cmp_GE m_1,m_s => m_1 // check if request fits
039 cbr m_1 -> ma_found,ma_next
040 ma_next: i2i m_c => m_p
041 loadAI m_p,@off_next => m_c
042 jumpI -> ma_loop
043 ma_yxct: loadAI m_c,@off_next => m_n // location of next free slot
044 jumpI -> ma_final
045 ma_found: addI m_s,@header => m_1
046 add m_1,m_c => m_n // location of new free slot
047 loadAI m_c,@off_size => m_1
048 subI m_1,@header => m_1
049 sub m_1,m_s => m_1 // size of new free slot
050 storeAI m_1 => m_n,@off_size
051 loadAI m_c,@off_next => m_1 // location of next free slot
052 storeAI m_1 => m_n,@off_next
053 ma_final: storeAI m_n => m_p,@off_next // link previous free slot to new
054 addI m_c,@header => m_c // move to object location
055 loadI 1 => m_1
056 storeAI m_1 => m_c,@off_oref // set reference count to 1
057 storeAI m_s => m_c,@off_osize // set object size
058 ma_null: pop => m_1 // load return address
059 push m_c // store object address
060 jump -> m_1
061 // increase reference count of object
062 // stack: [return address, object address] -> []
063 memaddref: loadI 0 => m_0
064 pop => m_n // load object address
065 cmp_EQ m_0,m_n => m_1
066 cbr m_1 -> mr_ynul,mr_nnul // check if null pointer
067 mr_ynul: haltI 1865445997
068 mr_nnul: loadAI m_n,@off_oref => m_1
069 addI m_1,1 => m_1
070 storeAI m_1 => m_n,@off_oref
071 pop => m_1 // load return address
072 jump -> m_1
073 // decrease reference count of object
074 // frees memory if count goes to zero
075 // stack: [return address, object address] -> []
076 memfree: loadI 0 => m_0
077 pop => m_n // load object address
078 cmp_EQ m_0,m_n => m_1
079 cbr m_1 -> mf_ynul,mf_nnul // check if null pointer
080 mf_ynul: haltI 1865442925
081 mf_nnul: loadAI m_n,@off_oref => m_1
082 subI m_1,1 => m_1
083 cmp_GT m_1,m_0 => m_2
084 cbr m_2 -> mf_exit,mf_free
085 mf_exit: storeAI m_1 => m_n,@off_oref
086 pop => m_1 // load return address
087 jump -> m_1
088 mf_free: subI m_n,@header => m_n
089 loadI @fst_ptr => m_p
090 loadAI m_p,@off_next => m_c
091 mf_loop: cmp_EQ m_0,m_c => m_1 // loop until the surrounding free blocks are found
092 cbr m_1 -> mf_halt,mf_cont // check if address pointer is null
093 mf_halt: haltI 1882220141 // halt program; object beyond last free slot (or memory corrupted)
094 mf_cont: cmp_EQ m_c,m_n => m_1
095 cbr m_1 -> mf_hal2,mf_con2
096 mf_hal2: haltI 1717855853 // halt program; object is free slot
097 mf_con2: cmp_LE m_c,m_n => m_1
098 cbr m_1 -> mf_next,mf_done
099 mf_next: i2i m_c => m_p
100 loadAI m_p,@off_next => m_c
101 jumpI -> mf_loop
102 mf_done: loadAI m_p,@off_size => m_1
103 addI m_1,@header => m_1
104 add m_1,m_p => m_2
105 cmp_EQ m_2,m_n => m_2
106 cbr m_2 -> mf_yprv,mf_nprv
107 mf_yprv: loadAI m_n,@off_size => m_2 // merge with previous free slot
108 add m_1,m_2 => m_1 // new size of previous free slot
109 storeAI m_1 => m_p,@off_size
110 i2i m_p => m_n
111 jumpI -> mf_dprv
112 mf_nprv: storeAI m_n => m_p,@off_next // link previous free slot with new
113 mf_dprv: loadAI m_n,@off_size => m_1
114 addI m_1,@header => m_1
115 add m_1,m_n => m_2
116 cmp_EQ m_2,m_c => m_2
117 cbr m_2 -> mf_ynxt,mf_nnxt
118 mf_ynxt: loadAI m_c,@off_size => m_2 // merge with next free slot
119 add m_1,m_2 => m_1 // new size of next free slot
120 storeAI m_1 => m_n,@off_size
121 loadAI m_c,@off_next => m_1
122 storeAI m_1 => m_n,@off_next // move link of next's next to new free slot
123 pop => m_1 // load return address
124 jump -> m_1
125 mf_nnxt: storeAI m_c => m_n,@off_next // link new free slot with next
126 pop => m_1 // load return address
127 jump -> m_1
128 // copy object to location
129 // stack: [return address, object address, destination] -> []
130 memcopy: loadI 0 => m_0
131 haltI 1835626101 // unimplemented
132 pop => m_1 // load return address
133 jump -> m_1
134 ememlib_: nop
135 // end of memlib
136 // stdlib - generic subroutines for ILOC
137 //
138 // This library contains a few common subroutines for Boppi.
139 //
140 // @author Frank Wibbelink
141 // initialise
142 stdlib_: jumpI -> estdlib_
143 // write a boolean to output
144 // stack: [return address, bool] -> []
145 stdbout: pop => m_1 // get boolean
146 loadI 0 => m_2 // load zero-length string
147 push m_2
148 cbr m_1 -> sbout_t,sbout_f
149 sbout_t: cout "true"
150 jumpI -> sbout_e
151 sbout_f: cout "false"
152 sbout_e: pop => m_1 // load return address
153 jump -> m_1
154 // read a character from input
155 // stack: [return address] -> [char]
156 stdcin: cin "" // get line
157 pop => m_1 // get length
158 cbr m_1 -> scin_t,stdcin // repeat until at least one character
159 scin_t: cpop => m_2 // save character
160 scin_lc: subI m_1,1 => m_1 // decrement char count
161 cbr m_1 -> scin_ll,scin_le
162 scin_ll: cpop => m_0 // discard character
163 jumpI -> scin_lc // repeat
164 scin_le: loadI 0 => m_0 // reset zero register
165 pop => m_1 // get return address
166 cpush m_2 // push result character
167 jump -> m_1
168 estdlib_: nop
169 // end of stdlib
170 loadI 0 => r_nul // initialise zero register
171 loadI 176 => r_arp // malloc
172 push r_arp // malloc
173 loadI 28 => r_arp // malloc
174 push r_arp // malloc
175 jumpI -> memalloc // malloc
176 pop => r_arp // malloc
177 addI r_arp,16 => r_arp // construct main AR
178 jumpI -> s0 // define memoizedFib - jump over body
179 nop // define memoizedFib - entry point
180 loadI 50 => __1 // 50
181 multI __1,4 => __1 // produce array size
182 cmp_GE __1,r_nul => __2 // check size non negative
183 cbr __2 -> aszt1,aszf2 //
184 aszf2: haltI 1634628474 // invalid array size
185 aszt1: nop // valid array size
186 loadI 190 => __2 // malloc
187 push __2 // malloc
188 push __1 // malloc
189 jumpI -> memalloc // malloc
190 pop => __2 // malloc
191 addI r_arp,0 => __1 // add offset
192 load __1 => __3 // load reference
193 cmp_EQ __3,r_nul => __4 // remove old reference
194 cbr __4 -> ynul3,nnul4 // remove old reference
195 nnul4: nop // remove old reference
196 loadI 200 => __4 // free
197 push __4 // free
198 push __3 // free
199 jumpI -> memfree // free
200 ynul3: nop // remove old reference
201 store __2 => __1 // to memo
202 load __1 => __3 // load reference
203 loadI 207 => __5 // memaddref
204 push __5 // memaddref
205 push __3 // memaddref
206 jumpI -> memaddref // memaddref
207 cmp_EQ __2,r_nul => __3 // remove old reference
208 cbr __3 -> ynul5,nnul6 // remove old reference
209 nnul6: nop // remove old reference
210 loadI 214 => __3 // free
211 push __3 // free
212 push __2 // free
213 jumpI -> memfree // free
214 ynul5: nop // remove old reference
215 jumpI -> s7 // define fib - jump over body
216 nop // define fib - entry point
217 addI r_arp,0 => __1 // add offset
218 load __1 => __1 // load address
219 loadI 1 => __3 // 1
220 cmp_LT __1,__3 => __1 // <
221 addI r_arp,0 => __2 // add offset
222 load __2 => __2 // load address
223 loadI 46 => __5 // 46
224 cmp_GT __2,__5 => __2 // >
225 or __1,__2 => __1 // ||
226 cbr __1 -> if_t8,if_f9 //
227 if_t8: nop //
228 loadI 0 => __2 // 0
229 i2i __2 => __5 // result
230 jumpI -> if_e10 //
231 if_f9: nop //
232 addI r_arp,0 => __1 // add offset
233 load __1 => __1 // load address
234 loadI 2 => __3 // 2
235 cmp_LT __1,__3 => __1 // <
236 cbr __1 -> if_t11,if_f12 //
237 if_t11: nop //
238 loadI 1 => __2 // 1
239 i2i __2 => __3 // result
240 jumpI -> if_e13 //
241 if_f12: nop //
242 i2i r_arp => ART // travelling ALs
243 loadAI ART,-16 => ART // \
244 addI ART,0 => __1 // add offset
245 load __1 => __1 // get array object
246 addI r_arp,0 => __2 // add offset
247 load __2 => __2 // load address
248 loadAI __1,-4 => __6 // check array index
249 divI __6,4 => __6 // check array index
250 cmp_LT __2,__6 => __6 // check array index
251 cmp_GE __2,r_nul => __4 // check array index
252 and __6,__4 => __4 // check array index
253 cbr __4 -> nob18,oob17 // check array index
254 oob17: haltI 1634692962 // array index out of bounds
255 nob18: multI __2,4 => __2 // multiply index by size
256 add __1,__2 => __1 // get array index address
257 load __1 => __1 // load address
258 loadI 0 => __4 // 0
259 cmp_GT __1,__4 => __1 // >
260 cbr __1 -> if_t14,if_f15 //
261 if_t14: nop //
262 i2i r_arp => ART // travelling ALs
263 loadAI ART,-16 => ART // \
264 addI ART,0 => __2 // add offset
265 load __2 => __2 // get array object
266 addI r_arp,0 => __1 // add offset
267 load __1 => __1 // load address
268 loadAI __2,-4 => __7 // check array index
269 divI __7,4 => __7 // check array index
270 cmp_LT __1,__7 => __7 // check array index
271 cmp_GE __1,r_nul => __6 // check array index
272 and __7,__6 => __6 // check array index
273 cbr __6 -> nob20,oob19 // check array index
274 oob19: haltI 1634692962 // array index out of bounds
275 nob20: multI __1,4 => __1 // multiply index by size
276 add __2,__1 => __2 // get array index address
277 load __2 => __2 // load address
278 i2i __2 => __4 // result
279 jumpI -> if_e16 //
280 if_f15: nop //
281 i2i r_arp => ART // travelling ALs
282 loadAI ART,-16 => ART // \
283 addI ART,4 => __2 // add offset
284 load __2 => __6 // call fib - load function reference
285 loadAI __6,8 => __6 // call fib - load AR size
286 loadI 290 => __1 // malloc
287 push __1 // malloc
288 push __6 // malloc
289 jumpI -> memalloc // malloc
290 pop => __1 // malloc
291 addI __1,16 => __1 // call fib - shift AR
292 addI r_arp,0 => __2 // add offset
293 load __2 => __2 // load address
294 loadI 1 => __6 // 1
295 sub __2,__6 => __2 // -
296 storeAI __2 => __1,0 // call fib - store param 0
297 push __5 // call fib - register save __5
298 push __3 // call fib - register save __3
299 push __4 // call fib - register save __4
300 i2i r_arp => ART // travelling ALs
301 loadAI ART,-16 => ART // \
302 addI ART,4 => __6 // add offset
303 load __6 => __7 // call fib - load function reference
304 storeAI r_arp => __1,-4 // call fib - link caller ARP
305 loadAI __7,4 => __2 // call fib - load AL
306 storeAI __2 => __1,-16 // call fib - link AL
307 loadAI __1,-16 => ART // add ref for callee's AL
308 i2i ART => ART // AR incRef
309 cmp_NE ART,r_nul => __2 // AR incRef
310 cbr __2 -> aril21,arid22 // AR incRef
311 aril21: loadI 316 => __2 // AR incRef
312 push __2 // AR incRef
313 subI ART,16 => __2 // AR incRef
314 push __2 // AR incRef
315 jumpI -> memaddref // AR incRef
316 loadAI ART,-16 => ART // AR incRef
317 cmp_NE ART,r_nul => __2 // AR incRef
318 cbr __2 -> aril21,arid22 // AR incRef
319 arid22: nop // AR incRef
320 loadI 325 => __2 // call fib - load return address
321 storeAI __2 => __1,-8 // call fib - set return address
322 i2i __1 => r_arp // call fib - move ARP
323 loadAI __7,0 => __2 // call fib - load target address
324 jump -> __2 // call fib - execute
325 i2i r_arp => ART // AR decRef
326 cmp_NE ART,r_nul => __7 // AR decRef
327 cbr __7 -> ardl23,ardd24 // AR decRef
328 ardl23: loadI 333 => __7 // AR decRef
329 push __7 // AR decRef
330 subI ART,16 => __7 // AR decRef
331 push __7 // AR decRef
332 jumpI -> memfree // AR decRef
333 loadAI ART,-16 => ART // AR decRef
334 cmp_NE ART,r_nul => __7 // AR decRef
335 cbr __7 -> ardl23,ardd24 // AR decRef
336 ardd24: nop // AR decRef
337 pop => __4 // call fib - register unsave __4
338 pop => __3 // call fib - register unsave __3
339 pop => __5 // call fib - register unsave __5
340 loadAI r_arp,-12 => __1 // call fib - load result
341 loadAI r_arp,-4 => r_arp // call fib - reset ARP
342 i2i r_arp => ART // travelling ALs
343 loadAI ART,-16 => ART // \
344 addI ART,4 => __6 // add offset
345 load __6 => __7 // call fib - load function reference
346 loadAI __7,8 => __7 // call fib - load AR size
347 loadI 351 => __2 // malloc
348 push __2 // malloc
349 push __7 // malloc
350 jumpI -> memalloc // malloc
351 pop => __2 // malloc
352 addI __2,16 => __2 // call fib - shift AR
353 addI r_arp,0 => __7 // add offset
354 load __7 => __7 // load address
355 loadI 2 => __6 // 2
356 sub __7,__6 => __7 // -
357 storeAI __7 => __2,0 // call fib - store param 0
358 push __5 // call fib - register save __5
359 push __3 // call fib - register save __3
360 push __4 // call fib - register save __4
361 push __1 // call fib - register save __1
362 i2i r_arp => ART // travelling ALs
363 loadAI ART,-16 => ART // \
364 addI ART,4 => __8 // add offset
365 load __8 => __7 // call fib - load function reference
366 storeAI r_arp => __2,-4 // call fib - link caller ARP
367 loadAI __7,4 => __6 // call fib - load AL
368 storeAI __6 => __2,-16 // call fib - link AL
369 loadAI __2,-16 => ART // add ref for callee's AL
370 i2i ART => ART // AR incRef
371 cmp_NE ART,r_nul => __6 // AR incRef
372 cbr __6 -> aril25,arid26 // AR incRef
373 aril25: loadI 378 => __6 // AR incRef
374 push __6 // AR incRef
375 subI ART,16 => __6 // AR incRef
376 push __6 // AR incRef
377 jumpI -> memaddref // AR incRef
378 loadAI ART,-16 => ART // AR incRef
379 cmp_NE ART,r_nul => __6 // AR incRef
380 cbr __6 -> aril25,arid26 // AR incRef
381 arid26: nop // AR incRef
382 loadI 387 => __6 // call fib - load return address
383 storeAI __6 => __2,-8 // call fib - set return address
384 i2i __2 => r_arp // call fib - move ARP
385 loadAI __7,0 => __6 // call fib - load target address
386 jump -> __6 // call fib - execute
387 i2i r_arp => ART // AR decRef
388 cmp_NE ART,r_nul => __7 // AR decRef
389 cbr __7 -> ardl27,ardd28 // AR decRef
390 ardl27: loadI 395 => __7 // AR decRef
391 push __7 // AR decRef
392 subI ART,16 => __7 // AR decRef
393 push __7 // AR decRef
394 jumpI -> memfree // AR decRef
395 loadAI ART,-16 => ART // AR decRef
396 cmp_NE ART,r_nul => __7 // AR decRef
397 cbr __7 -> ardl27,ardd28 // AR decRef
398 ardd28: nop // AR decRef
399 pop => __1 // call fib - register unsave __1
400 pop => __4 // call fib - register unsave __4
401 pop => __3 // call fib - register unsave __3
402 pop => __5 // call fib - register unsave __5
403 loadAI r_arp,-12 => __2 // call fib - load result
404 loadAI r_arp,-4 => r_arp // call fib - reset ARP
405 add __1,__2 => __1 // +
406 i2i r_arp => ART // travelling ALs
407 loadAI ART,-16 => ART // \
408 addI ART,0 => __7 // add offset
409 load __7 => __7 // get array object
410 addI r_arp,0 => __6 // add offset
411 load __6 => __6 // load address
412 loadAI __7,-4 => __8 // check array index
413 divI __8,4 => __8 // check array index
414 cmp_LT __6,__8 => __8 // check array index
415 cmp_GE __6,r_nul => __2 // check array index
416 and __8,__2 => __2 // check array index
417 cbr __2 -> nob30,oob29 // check array index
418 oob29: haltI 1634692962 // array index out of bounds
419 nob30: multI __6,4 => __6 // multiply index by size
420 add __7,__6 => __7 // get array index address
421 store __1 => __7 // to memo[n]
422 i2i __1 => __4 // result
423 if_e16: nop // end target
424 i2i __4 => __3 // result
425 if_e13: nop // end target
426 i2i __3 => __5 // result
427 if_e10: nop // end target
428 storeAI __5 => r_arp,-12 // define fib - move result
429 loadAI r_arp,-8 => __1 // load ref count
430 loadI 1 => __4 // one
431 cmp_LE __1,__4 => __1 // check more than one ref
432 cbr __1 -> ycl31,ncl32 // remove vars if last reference
433 ycl31: nop // cleanup target
434 ncl32: nop // no cleanup target
435 loadAI r_arp,-8 => __4 // define fib - load return address
436 jump -> __4 // define fib - go to return address
437 s7: nop // define fib - skip target
438 loadI 443 => __3 // malloc
439 push __3 // malloc
440 loadI 12 => __3 // malloc
441 push __3 // malloc
442 jumpI -> memalloc // malloc
443 pop => __3 // malloc
444 loadI 216 => __1 // define fib - load target address
445 storeAI __1 => __3,0 // define fib - set target address
446 storeAI r_arp => __3,4 // define fib - copy ARP
447 loadI 20 => __1 // define fib - load AR size
448 storeAI __1 => __3,8 // define fib - set AR size
449 storeAI __3 => r_arp,4 // define fib - set function reference
450 i2i r_arp => ART // AR incRef
451 cmp_NE ART,r_nul => __3 // AR incRef
452 cbr __3 -> aril33,arid34 // AR incRef
453 aril33: loadI 458 => __3 // AR incRef
454 push __3 // AR incRef
455 subI ART,16 => __3 // AR incRef
456 push __3 // AR incRef
457 jumpI -> memaddref // AR incRef
458 loadAI ART,-16 => ART // AR incRef
459 cmp_NE ART,r_nul => __3 // AR incRef
460 cbr __3 -> aril33,arid34 // AR incRef
461 arid34: nop // AR incRef
462 addI r_arp,4 => __4 // add offset
463 load __4 => __4 // load address
464 loadI 468 => __7 // memaddref
465 push __7 // memaddref
466 push __4 // memaddref
467 jumpI -> memaddref // memaddref
468 loadAI __4,4 => __5 // add new reference
469 i2i __5 => ART // AR incRef
470 cmp_NE ART,r_nul => __7 // AR incRef
471 cbr __7 -> aril35,arid36 // AR incRef
472 aril35: loadI 477 => __7 // AR incRef
473 push __7 // AR incRef
474 subI ART,16 => __7 // AR incRef
475 push __7 // AR incRef
476 jumpI -> memaddref // AR incRef
477 loadAI ART,-16 => ART // AR incRef
478 cmp_NE ART,r_nul => __7 // AR incRef
479 cbr __7 -> aril35,arid36 // AR incRef
480 arid36: nop // AR incRef
481 storeAI __4 => r_arp,-12 // define memoizedFib - move result
482 loadAI r_arp,-8 => __3 // load ref count
483 loadI 1 => __7 // one
484 cmp_LE __3,__7 => __3 // check more than one ref
485 cbr __3 -> ycl37,ncl38 // remove vars if last reference
486 ycl37: nop // cleanup target
487 loadAI r_arp,0 => __5 // remove reference get var
488 cmp_EQ __5,r_nul => __4 // remove reference
489 cbr __4 -> ynul39,nnul40 // remove reference
490 nnul40: nop // remove reference
491 loadI 495 => __4 // free
492 push __4 // free
493 push __5 // free
494 jumpI -> memfree // free
495 ynul39: nop // remove reference
496 loadAI r_arp,4 => __5 // remove reference get var
497 cmp_EQ __5,r_nul => __4 // remove reference
498 cbr __4 -> ynul41,nnul42 // remove reference
499 nnul42: nop // remove reference
500 loadI 504 => __4 // free
501 push __4 // free
502 push __5 // free
503 jumpI -> memfree // free
504 loadAI __5,4 => __5 // remove reference
505 i2i __5 => ART // AR decRef
506 cmp_NE ART,r_nul => __4 // AR decRef
507 cbr __4 -> ardl43,ardd44 // AR decRef
508 ardl43: loadI 513 => __4 // AR decRef
509 push __4 // AR decRef
510 subI ART,16 => __4 // AR decRef
511 push __4 // AR decRef
512 jumpI -> memfree // AR decRef
513 loadAI ART,-16 => ART // AR decRef
514 cmp_NE ART,r_nul => __4 // AR decRef
515 cbr __4 -> ardl43,ardd44 // AR decRef
516 ardd44: nop // AR decRef
517 ynul41: nop // remove reference
518 ncl38: nop // no cleanup target
519 loadAI r_arp,-8 => __7 // define memoizedFib - load return address
520 jump -> __7 // define memoizedFib - go to return address
521 s0: nop // define memoizedFib - skip target
522 loadI 527 => __5 // malloc
523 push __5 // malloc
524 loadI 12 => __5 // malloc
525 push __5 // malloc
526 jumpI -> memalloc // malloc
527 pop => __5 // malloc
528 loadI 179 => __3 // define memoizedFib - load target address
529 storeAI __3 => __5,0 // define memoizedFib - set target address
530 storeAI r_arp => __5,4 // define memoizedFib - copy ARP
531 loadI 24 => __3 // define memoizedFib - load AR size
532 storeAI __3 => __5,8 // define memoizedFib - set AR size
533 storeAI __5 => r_arp,0 // define memoizedFib - set function reference
534 i2i r_arp => ART // AR incRef
535 cmp_NE ART,r_nul => __5 // AR incRef
536 cbr __5 -> aril45,arid46 // AR incRef
537 aril45: loadI 542 => __5 // AR incRef
538 push __5 // AR incRef
539 subI ART,16 => __5 // AR incRef
540 push __5 // AR incRef
541 jumpI -> memaddref // AR incRef
542 loadAI ART,-16 => ART // AR incRef
543 cmp_NE ART,r_nul => __5 // AR incRef
544 cbr __5 -> aril45,arid46 // AR incRef
545 arid46: nop // AR incRef
546 addI r_arp,0 => __1 // add offset
547 load __1 => __4 // call memoizedFib - load function reference
548 loadAI __4,8 => __4 // call memoizedFib - load AR size
549 loadI 553 => __7 // malloc
550 push __7 // malloc
551 push __4 // malloc
552 jumpI -> memalloc // malloc
553 pop => __7 // malloc
554 addI __7,16 => __7 // call memoizedFib - shift AR
555 addI r_arp,0 => __6 // add offset
556 load __6 => __5 // call memoizedFib - load function reference
557 storeAI r_arp => __7,-4 // call memoizedFib - link caller ARP
558 loadAI __5,4 => __3 // call memoizedFib - load AL
559 storeAI __3 => __7,-16 // call memoizedFib - link AL
560 loadAI __7,-16 => ART // add ref for callee's AL
561 i2i ART => ART // AR incRef
562 cmp_NE ART,r_nul => __3 // AR incRef
563 cbr __3 -> aril47,arid48 // AR incRef
564 aril47: loadI 569 => __3 // AR incRef
565 push __3 // AR incRef
566 subI ART,16 => __3 // AR incRef
567 push __3 // AR incRef
568 jumpI -> memaddref // AR incRef
569 loadAI ART,-16 => ART // AR incRef
570 cmp_NE ART,r_nul => __3 // AR incRef
571 cbr __3 -> aril47,arid48 // AR incRef
572 arid48: nop // AR incRef
573 loadI 578 => __3 // call memoizedFib - load return address
574 storeAI __3 => __7,-8 // call memoizedFib - set return address
575 i2i __7 => r_arp // call memoizedFib - move ARP
576 loadAI __5,0 => __3 // call memoizedFib - load target address
577 jump -> __3 // call memoizedFib - execute
578 i2i r_arp => ART // AR decRef
579 cmp_NE ART,r_nul => __3 // AR decRef
580 cbr __3 -> ardl49,ardd50 // AR decRef
581 ardl49: loadI 586 => __3 // AR decRef
582 push __3 // AR decRef
583 subI ART,16 => __3 // AR decRef
584 push __3 // AR decRef
585 jumpI -> memfree // AR decRef
586 loadAI ART,-16 => ART // AR decRef
587 cmp_NE ART,r_nul => __3 // AR decRef
588 cbr __3 -> ardl49,ardd50 // AR decRef
589 ardd50: nop // AR decRef
590 loadAI r_arp,-12 => __5 // call memoizedFib - load result
591 loadAI r_arp,-4 => r_arp // call memoizedFib - reset ARP
592 addI r_arp,4 => __6 // add offset
593 load __6 => __4 // load reference
594 cmp_EQ __4,r_nul => __1 // remove old reference
595 cbr __1 -> ynul51,nnul52 // remove old reference
596 nnul52: nop // remove old reference
597 loadI 601 => __1 // free
598 push __1 // free
599 push __4 // free
600 jumpI -> memfree // free
601 loadAI __4,4 => __4 // remove old reference
602 i2i __4 => ART // AR decRef
603 cmp_NE ART,r_nul => __1 // AR decRef
604 cbr __1 -> ardl53,ardd54 // AR decRef
605 ardl53: loadI 610 => __1 // AR decRef
606 push __1 // AR decRef
607 subI ART,16 => __1 // AR decRef
608 push __1 // AR decRef
609 jumpI -> memfree // AR decRef
610 loadAI ART,-16 => ART // AR decRef
611 cmp_NE ART,r_nul => __1 // AR decRef
612 cbr __1 -> ardl53,ardd54 // AR decRef
613 ardd54: nop // AR decRef
614 ynul51: nop // remove old reference
615 store __5 => __6 // to myFib
616 load __6 => __3 // load reference
617 loadI 621 => __1 // memaddref
618 push __1 // memaddref
619 push __3 // memaddref
620 jumpI -> memaddref // memaddref
621 loadAI __3,4 => __7 // add new reference
622 i2i __7 => ART // AR incRef
623 cmp_NE ART,r_nul => __1 // AR incRef
624 cbr __1 -> aril55,arid56 // AR incRef
625 aril55: loadI 630 => __1 // AR incRef
626 push __1 // AR incRef
627 subI ART,16 => __1 // AR incRef
628 push __1 // AR incRef
629 jumpI -> memaddref // AR incRef
630 loadAI ART,-16 => ART // AR incRef
631 cmp_NE ART,r_nul => __1 // AR incRef
632 cbr __1 -> aril55,arid56 // AR incRef
633 arid56: nop // AR incRef
634 cmp_EQ __5,r_nul => __7 // remove old reference
635 cbr __7 -> ynul57,nnul58 // remove old reference
636 nnul58: nop // remove old reference
637 loadI 641 => __7 // free
638 push __7 // free
639 push __5 // free
640 jumpI -> memfree // free
641 loadAI __5,4 => __5 // remove old reference
642 i2i __5 => ART // AR decRef
643 cmp_NE ART,r_nul => __7 // AR decRef
644 cbr __7 -> ardl59,ardd60 // AR decRef
645 ardl59: loadI 650 => __7 // AR decRef
646 push __7 // AR decRef
647 subI ART,16 => __7 // AR decRef
648 push __7 // AR decRef
649 jumpI -> memfree // AR decRef
650 loadAI ART,-16 => ART // AR decRef
651 cmp_NE ART,r_nul => __7 // AR decRef
652 cbr __7 -> ardl59,ardd60 // AR decRef
653 ardd60: nop // AR decRef
654 ynul57: nop // remove old reference
655 jumpI -> while_f62 // to condition
656 while_t61: nop // loop target
657 addI r_arp,4 => __1 // add offset
658 load __1 => __3 // call myFib - load function reference
659 loadAI __3,8 => __3 // call myFib - load AR size
660 loadI 664 => __6 // malloc
661 push __6 // malloc
662 push __3 // malloc
663 jumpI -> memalloc // malloc
664 pop => __6 // malloc
665 addI __6,16 => __6 // call myFib - shift AR
666 addI r_arp,8 => __5 // add offset
667 load __5 => __5 // load address
668 storeAI __5 => __6,0 // call myFib - store param 0
669 addI r_arp,4 => __4 // add offset
670 load __4 => __1 // call myFib - load function reference
671 storeAI r_arp => __6,-4 // call myFib - link caller ARP
672 loadAI __1,4 => __7 // call myFib - load AL
673 storeAI __7 => __6,-16 // call myFib - link AL
674 loadAI __6,-16 => ART // add ref for callee's AL
675 i2i ART => ART // AR incRef
676 cmp_NE ART,r_nul => __7 // AR incRef
677 cbr __7 -> aril64,arid65 // AR incRef
678 aril64: loadI 683 => __7 // AR incRef
679 push __7 // AR incRef
680 subI ART,16 => __7 // AR incRef
681 push __7 // AR incRef
682 jumpI -> memaddref // AR incRef
683 loadAI ART,-16 => ART // AR incRef
684 cmp_NE ART,r_nul => __7 // AR incRef
685 cbr __7 -> aril64,arid65 // AR incRef
686 arid65: nop // AR incRef
687 loadI 692 => __7 // call myFib - load return address
688 storeAI __7 => __6,-8 // call myFib - set return address
689 i2i __6 => r_arp // call myFib - move ARP
690 loadAI __1,0 => __7 // call myFib - load target address
691 jump -> __7 // call myFib - execute
692 i2i r_arp => ART // AR decRef
693 cmp_NE ART,r_nul => __7 // AR decRef
694 cbr __7 -> ardl66,ardd67 // AR decRef
695 ardl66: loadI 700 => __7 // AR decRef
696 push __7 // AR decRef
697 subI ART,16 => __7 // AR decRef
698 push __7 // AR decRef
699 jumpI -> memfree // AR decRef
700 loadAI ART,-16 => ART // AR decRef
701 cmp_NE ART,r_nul => __7 // AR decRef
702 cbr __7 -> ardl66,ardd67 // AR decRef
703 ardd67: nop // AR decRef
704 loadAI r_arp,-12 => __1 // call myFib - load result
705 loadAI r_arp,-4 => r_arp // call myFib - reset ARP
706 out "",__1 //
707 while_f62: nop // condition target
708 addI r_arp,8 => __6 // add offset
709 in "" => __7 //
710 addI r_arp,8 => __4 // add offset
711 store __7 => __4 // save to var n
712 loadI 0 => __1 // 0
713 cmp_GT __7,__1 => __7 // >
714 cbr __7 -> while_t61,while_e63 //
715 while_e63: nop // end target
716 loadAI r_arp,0 => __6 // remove reference get var
717 cmp_EQ __6,r_nul => __4 // remove reference
718 cbr __4 -> ynul68,nnul69 // remove reference
719 nnul69: nop // remove reference
720 loadI 724 => __4 // free
721 push __4 // free
722 push __6 // free
723 jumpI -> memfree // free
724 loadAI __6,4 => __6 // remove reference
725 i2i __6 => ART // AR decRef
726 cmp_NE ART,r_nul => __4 // AR decRef
727 cbr __4 -> ardl70,ardd71 // AR decRef
728 ardl70: loadI 733 => __4 // AR decRef
729 push __4 // AR decRef
730 subI ART,16 => __4 // AR decRef
731 push __4 // AR decRef
732 jumpI -> memfree // AR decRef
733 loadAI ART,-16 => ART // AR decRef
734 cmp_NE ART,r_nul => __4 // AR decRef
735 cbr __4 -> ardl70,ardd71 // AR decRef
736 ardd71: nop // AR decRef
737 ynul68: nop // remove reference
738 loadAI r_arp,4 => __6 // remove reference get var
739 cmp_EQ __6,r_nul => __4 // remove reference
740 cbr __4 -> ynul72,nnul73 // remove reference
741 nnul73: nop // remove reference
742 loadI 746 => __4 // free
743 push __4 // free
744 push __6 // free
745 jumpI -> memfree // free
746 loadAI __6,4 => __6 // remove reference
747 i2i __6 => ART // AR decRef
748 cmp_NE ART,r_nul => __4 // AR decRef
749 cbr __4 -> ardl74,ardd75 // AR decRef
750 ardl74: loadI 755 => __4 // AR decRef
751 push __4 // AR decRef
752 subI ART,16 => __4 // AR decRef
753 push __4 // AR decRef
754 jumpI -> memfree // AR decRef
755 loadAI ART,-16 => ART // AR decRef
756 cmp_NE ART,r_nul => __4 // AR decRef
757 cbr __4 -> ardl74,ardd75 // AR decRef
758 ardd75: nop // AR decRef
759 ynul72: nop // remove reference
760 subI r_arp,16 => r_arp // deconstruct main AR
761 loadI 765 => __7 // free
762 push __7 // free
763 push r_arp // free
764 jumpI -> memfree // free

23
doc/memlib-objects.gv Normal file
View File

@ -0,0 +1,23 @@
digraph {
rankdir=LR
node[shape=record]
obj1[label="<cnt>refcount: 1
|size: 8
|...
",xlabel="object"]
free1[label="<next>next-free
|size: 12
|...
",xlabel="free block"]
free2[label="<next>next-free
|size: 200
|...
",xlabel="free block"]
obj1:cnt->free1:next[style=invis]
free1:next->free2:next->{null1[shape=point]}
}

BIN
doc/memlib-objects.pdf Normal file

Binary file not shown.

43
doc/memlib-onealloc.gv Normal file
View File

@ -0,0 +1,43 @@
digraph {
mem[label=<<table>
<tr><td port="a0">0</td><td port="a1">4</td><td port="a2">8</td><td port="a3">12</td><td port="a4">16</td><td port="a5">20</td><td port="a6">24</td><td port="a7">28</td><td port="a8">32</td><td port="a9">36</td><td port="a10">40</td><td port="a11">44</td><td port="a12">48</td><td port="a13">52</td><td port="a14">56</td><td port="a15">60</td><td port="a16">64</td><td port="a17">68</td><td port="a18">72</td><td port="a19">76</td><td port="a20">80</td><td>...</td><td port="stack"><i>stack</i></td></tr>
</table>>, shape=plaintext, xlabel="memory"]
{
rank=same
free0[label=<<table>
<tr><td port="in">next-free</td><td port="next">20</td></tr>
</table>>, shape=plaintext, xlabel="free pointer"]
free1[label=<<table>
<tr><td port="in">next-free</td><td port="next">0</td></tr>
<tr><td>size</td><td>52</td></tr>
<tr><td colspan="2"><i>...</i></td></tr>
</table>>, shape=plaintext, xlabel="free block"]
null1[shape=point]
}
{
rank=same
object1[label=<<table>
<tr><td port="in">ref. count</td><td>1</td></tr>
<tr><td>size</td><td>8</td></tr>
<tr><td colspan="2"><i>data</i></td></tr>
</table>>, shape=plaintext, xlabel="object"]
}
brk->mem:a20
sp->mem:stack
mem:a0->free0:in
mem:a1->object1:in
mem:a5->free1:in
free0:next->free1:in
free1:next->null1
free1->object1[style=invis]
}

BIN
doc/memlib-onealloc.pdf Normal file

Binary file not shown.

58
doc/memlib-runtime.gv Normal file
View File

@ -0,0 +1,58 @@
digraph {
mem[label=<<table>
<tr><td port="a0">0</td><td port="a1">4</td><td port="a2">8</td><td port="a3">12</td><td port="a4">16</td><td port="a5">20</td><td port="a6">24</td><td port="a7">28</td><td port="a8">32</td><td port="a9">36</td><td port="a10">40</td><td port="a11">44</td><td port="a12">48</td><td port="a13">52</td><td port="a14">56</td><td port="a15">60</td><td port="a16">64</td><td port="a17">68</td><td port="a18">72</td><td port="a19">76</td><td port="a20">80</td><td>...</td><td port="stack"><i>stack</i></td></tr>
</table>>, shape=plaintext, xlabel="memory"]
{
rank=same
free0[label=<<table>
<tr><td port="in">next-free</td><td port="next">20</td></tr>
</table>>, shape=plaintext, xlabel="free pointer"]
free1[label=<<table>
<tr><td port="in">next-free</td><td port="next">52</td></tr>
<tr><td>size</td><td>4</td></tr>
<tr><td colspan="2"><i>...</i></td></tr>
</table>>, shape=plaintext, xlabel="free block"]
free2[label=<<table>
<tr><td port="in">next-free</td><td port="next">0</td></tr>
<tr><td>size</td><td>20</td></tr>
<tr><td colspan="2"><i>...</i></td></tr>
</table>>, shape=plaintext, xlabel="free block"]
null1[shape=point]
}
{
rank=same
object1[label=<<table>
<tr><td port="in">ref. count</td><td>1</td></tr>
<tr><td>size</td><td>8</td></tr>
<tr><td colspan="2"><i>data</i></td></tr>
</table>>, shape=plaintext, xlabel="object"]
object2[label=<<table>
<tr><td port="in">ref. count</td><td>3</td></tr>
<tr><td>size</td><td>12</td></tr>
<tr><td colspan="2"><i>data</i></td></tr>
</table>>, shape=plaintext, xlabel="object"]
}
brk->mem:a20
sp->mem:stack
mem:a0->free0:in
mem:a1->object1:in
mem:a5->free1:in
mem:a8->object2:in
mem:a13->free2:in
free0:next->free1:in
free1:next->free2:in
free2:next->null1
free1->object1[style=invis]
}

BIN
doc/memlib-runtime.pdf Normal file

Binary file not shown.

30
doc/memlib-start.gv Normal file
View File

@ -0,0 +1,30 @@
digraph {
mem[label=<<table>
<tr><td port="a0">0</td><td port="a1">4</td><td port="a2">8</td><td port="a3">12</td><td port="a4">16</td><td port="a5">20</td><td port="a6">24</td><td port="a7">28</td><td port="a8">32</td><td port="a9">36</td><td port="a10">40</td><td port="a11">44</td><td port="a12">48</td><td port="a13">52</td><td port="a14">56</td><td port="a15">60</td><td port="a16">64</td><td port="a17">68</td><td port="a18">72</td><td port="a19">76</td><td port="a20">80</td><td>...</td><td port="stack"><i>stack</i></td></tr>
</table>>, shape=plaintext, xlabel="memory"]
{
rank=same
free0[label=<<table>
<tr><td port="in">next-free</td><td port="next">4</td></tr>
</table>>, shape=plaintext, xlabel="free pointer"]
free1[label=<<table>
<tr><td port="in">next-free</td><td port="next">0</td></tr>
<tr><td>size</td><td>68</td></tr>
<tr><td colspan="2"><i>...</i></td></tr>
</table>>, shape=plaintext, xlabel="free block"]
null1[shape=point]
}
brk->mem:a20
sp->mem:stack
mem:a0->free0:in
mem:a1->free1:in
free0:next->free1:in
free1:next->null1
}

BIN
doc/memlib-start.pdf Normal file

Binary file not shown.

99
doc/nestedArray.iloc.txt Normal file
View File

@ -0,0 +1,99 @@
170 loadI 0 => r_nul // initialise zero register
171 loadI 176 => r_arp // malloc
172 push r_arp // malloc
173 loadI 20 => r_arp // malloc
174 push r_arp // malloc
175 jumpI -> memalloc // malloc
176 pop => r_arp // malloc
177 addI r_arp,16 => r_arp // construct main AR
178 loadI 183 => __1 // malloc
179 push __1 // malloc
180 loadI 8 => __1 // malloc
181 push __1 // malloc
182 jumpI -> memalloc // malloc
183 pop => __1 // malloc
184 loadI 189 => __2 // malloc
185 push __2 // malloc
186 loadI 8 => __2 // malloc
187 push __2 // malloc
188 jumpI -> memalloc // malloc
189 pop => __2 // malloc
190 loadI 1 => __3 // 1
191 storeAI __3 => __2,0 // store array element
192 loadI 2 => __3 // 2
193 storeAI __3 => __2,4 // store array element
194 storeAI __2 => __1,0 // store array element
195 loadI 200 => __2 // malloc
196 push __2 // malloc
197 loadI 8 => __2 // malloc
198 push __2 // malloc
199 jumpI -> memalloc // malloc
200 pop => __2 // malloc
201 loadI 3 => __3 // 3
202 storeAI __3 => __2,0 // store array element
203 loadI 4 => __3 // 4
204 storeAI __3 => __2,4 // store array element
205 storeAI __2 => __1,4 // store array element
206 addI r_arp,0 => __2 // add offset
207 load __2 => __3 // load reference
208 cmp_EQ __3,r_nul => __4 // remove old reference
209 cbr __4 -> ynul0,nnul1 // remove old reference
210 nnul1: nop // remove old reference
211 loadI 215 => __4 // free
212 push __4 // free
213 push __3 // free
214 jumpI -> memfree // free
215 ynul0: nop // remove old reference
216 store __1 => __2 // to matrix
217 load __2 => __3 // load reference
218 loadI 222 => __5 // memaddref
219 push __5 // memaddref
220 push __3 // memaddref
221 jumpI -> memaddref // memaddref
222 cmp_EQ __1,r_nul => __3 // remove old reference
223 cbr __3 -> ynul2,nnul3 // remove old reference
224 nnul3: nop // remove old reference
225 loadI 229 => __3 // free
226 push __3 // free
227 push __1 // free
228 jumpI -> memfree // free
229 ynul2: nop // remove old reference
230 addI r_arp,0 => __2 // add offset
231 load __2 => __2 // get array object
232 loadI 0 => __3 // 0
233 loadAI __2,-4 => __1 // check array index
234 divI __1,4 => __1 // check array index
235 cmp_LT __3,__1 => __1 // check array index
236 cmp_GE __3,r_nul => __4 // check array index
237 and __1,__4 => __4 // check array index
238 cbr __4 -> nob5,oob4 // check array index
239 oob4: haltI 1634692962 // array index out of bounds
240 nob5: multI __3,4 => __3 // multiply index by size
241 add __2,__3 => __2 // get array index address
242 load __2 => __2 // get array object
243 loadI 1 => __4 // 1
244 loadAI __2,-4 => __3 // check array index
245 divI __3,4 => __3 // check array index
246 cmp_LT __4,__3 => __3 // check array index
247 cmp_GE __4,r_nul => __1 // check array index
248 and __3,__1 => __1 // check array index
249 cbr __1 -> nob7,oob6 // check array index
250 oob6: haltI 1634692962 // array index out of bounds
251 nob7: multI __4,4 => __4 // multiply index by size
252 add __2,__4 => __2 // get array index address
253 load __2 => __2 // load address
254 out "",__2 //
255 loadAI r_arp,0 => __4 // remove reference get var
256 cmp_EQ __4,r_nul => __1 // remove reference
257 cbr __1 -> ynul8,nnul9 // remove reference
258 nnul9: nop // remove reference
259 loadI 263 => __1 // free
260 push __1 // free
261 push __4 // free
262 jumpI -> memfree // free
263 ynul8: nop // remove reference
264 subI r_arp,16 => r_arp // deconstruct main AR
265 loadI 269 => __2 // free
266 push __2 // free
267 push r_arp // free
268 jumpI -> memfree // free

View File

@ -0,0 +1,3 @@
Two instructions have been added to the ILOC architecture: \verb|halt| taking one register as input and \verb|haltI| taking one number as input. These instructions were added to aid in debugging, to allow a program to signal unrecoverable failure and to provide software interrupts in general. After executing a halt instruction, the interrupt value will be accessible in \verb|Machine::getInterrupt| and can be reset with \verb|Machine::clearInterrupt|. A non-zero interrupt value prevents the \verb|Simulator::run| method from executing any instruction. \verb|Simulator::step| will still execute as usual.\\
One register has been added: \verb|brk|, named after the unix functions \emph{brk} and \emph{sbrk} to manage the size of the data segment of a program. This value marks the end of dedicated heap space and is initialised to halfway the total memory of the virtual machine. The \verb|Simulator| is modified to throw an exception if the stack pointer goes below this address. \emph{memlib} uses this register to determine the amount of free space.

View File

@ -1,29 +1,18 @@
This report summarizes the Boppi language and default implementation. It describes the features of the languages both in terms of usage and in terms of formal requirements. The Java implementation of the compiler chain is elaborated as well as the standard libraries written in ILOC. Together with the discussion of problems encountered during the project, these give a thorough insight into the process of making this project and the end result.\\
Boppi features a basic expression language, variables, closures and arrays. The addition of closures added a lot of value to both Boppi and the understanding of programming languages. Moreover, together with string (character array) input and output, the language can be used for rudimentary programs.
\section{Future work}
\label{future-work}
The compiler can be improved by separating ANTLR's abstract syntax tree from type checking and code generation. For the latter this reduces code duplication and allows more insight into what data is necessary for a set of instructions. Because tree rewriting is not possible in ANTLR 4, this seems to be the optimal way. The binary size can also be improved by combining jump targets, by not generating unreachable code and by jumping to functional instructions rather than generating NOP instructions. The last may be accomplished by annotating the AST with the entry point of a node.\\
% verslag-samenvatting.tex
Furthermore, the compiler can be improved by separating call frames from their closure. While closures are in general a graph, call frames always form a stack. Keeping the call frames on the stack may improve readability of the Java code and the ILOC code and state of the machine while debugging.\\
The language can be improved with type inference, notably using the Hindley-Milner (\emph{HM}) type system. The inference mechanism for HM runs in linear time and is relatively easy to implement, especially with the \verb|checkConstraint| method already in place. However, it requires the implementation of variables on the type level. Moreover, it has to give meaningful errors if a unification fails or if free types are left after checking. This feature goes hand in hand with polymorphism because of these type variables.\\
% verslag-problemen.tex
The language can be improved with polymorphism. This could be done best by boxing all values by default and only unboxing (primitive) values through an optimization pass. Moreover, it would be good practice to include type information during runtime to support polymorphic recursion and to simplify code generation for all forms of polymorphism.\\
The garbage collector can be improved by checking for cycles in references. A function reference and its enclosing function reference each other, so they may continue to exist after the enclosing function has ended even after nothing can access the function's local variables. One approach is to periodically recursively iterate over all references starting at the global scope marking them all, then for each object in memory delete it if it is unmarked or simply unmark it if it was marked. (\emph{tracing garbage collector}) If most references are non-circular, it makes sense to use both the current garbage collector and a tracing garbage collector side by side.\\
% verslag-beschrijving.tex
% verslag-software.tex
% verslag-tests.tex
% verslag-conclusies.tex
% verslag-grammatica.tex
% verslag-walker.tex
% verslag-testprogramma.tex
Moreover, the deallocation of reference types (both variables and closures) ought to be implemented as a small subroutine rather than as inlined code at every occurence. This would improve the size of the binary and improve the correctness of the garbage collection. Currently, the garbage collector can either leak memory and handle closures correctly, or it can eagerly deallocate memory and break closures. Also, the garbage collector does not collect refernce types within arrays.\\
Lastly, the \emph{memlib} can be improved by adding the ability to change the \verb|brk| value dynamically. Or, when there is too little memory available, to signal the VM to resize the memory and retry allocating. Also, the library can be improved by providing a reallocation and a copy function.

View File

@ -6,151 +6,790 @@
% Code generation: what kind of target code is generated for the feature?
% You may make use of your ANTLR grammar as a basis for this description, but note that not every
% rule necessarily corresponds to a language feature.
This section describes the language features of Boppi. The tokens used in the syntax listings and explanation can be found in \cref{report-grammar}. Comments and whitespace is discarded by the lexer, so they are not listed here.
\section{Basic expressions}
At the heart of the language are basic arithmetic, logic and compound expressions and literals. These support basic operations for boolean, integer and character types.
\subsection{Basic expressions}
At the very core of the language are basic arithmetic, logic and compound expressions and literals for fundamental types.
\paragraph{Syntax}
The basic expression language consists of a \verb|program| that contains a sequence of statements separated by semicolons, \verb|stats|. A single \verb|stat|ement in the basic language can only be an expression, which can be an arithmetic or logical expression, an integer, a character, a boolean or another sequence of statements. The ANTLR parser tree can be seen in \cref{basic-syntax}.
\begin{figure}
\caption{ANTLR4 code of the basic expression language of Boppi}
\label{basic-syntax}
\begin{minted}{antlr}
program: stats EOF;
\subsubsection{Syntax}
\begin{minted}{antlr}
program: expr EOF;
expr
: singleExpr (COMPOUND singleExpr?)*
stats
: stat (COMPOUND stat?)*
;
singleExpr
: PAROPEN expr PARCLOSE #parens
| BRAOPEN expr BRACLOSE #block
| op=(PLUS|MINUS|NOT) singleExpr #prefix1
| lhs=singleExpr op=(MULTIPLY|DIVIDE) rhs=singleExpr #infix1
| lhs=singleExpr op=(PLUS|MINUS) rhs=singleExpr #infix2
| lhs=singleExpr op=(LT|LEQ|GTE|GT|EQ|NEQ) rhs=singleExpr #infix3
| lhs=singleExpr AND rhs=singleExpr #infix4
| lhs=singleExpr OR rhs=singleExpr #infix5
stat
: ...
| expr
;
expr
: ...
| PAROPEN stats PARCLOSE #parens
| BRAOPEN stats BRACLOSE #block
| op=(PLUS|MINUS|NOT) expr #prefix1
| lhs=expr op=(MULTIPLY|DIVIDE) rhs=expr #infix1
| lhs=expr op=(PLUS|MINUS) rhs=expr #infix2
| lhs=expr op=(LT|LEQ|GTE|GT|EQ|NEQ) rhs=expr #infix3
| lhs=expr AND rhs=expr #infix4
| lhs=expr OR rhs=expr #infix5
| LITERAL10 #literalInteger
| CHAR #literalCharacter
| (TRUE|FALSE) #literalBoolean
;
\end{minted}
\end{minted}
\end{figure}
\paragraph{Examples}
\begin{minted}{boppi}
See \cref{basic-examples} for a few examples of basic expressions. Since input and output is only introduced in \cref{io-section}, the result of an expression is simply written in the comments.
\begin{figure}
\caption{Basic expressions in Boppi}
\label{basic-examples}
\begin{minted}{boppi}
5; //an integer literal as a statement
'c';;;; //a character literal followed by empty statements
4+(2*3/-1); //an arithmetic expression (result: -2)
{
true && false; //an expression with two boolean literals (result: false)
3 //the last expression in a block is the value of the block (result: 3)
}+4; //another arithmetic expression (result: 7)
\end{minted}
\end{figure}
\paragraph{Use}
\verb|#prefix1|, \verb|#infix1| and \verb|#infix2| restrict their operand types and result type to integers. \verb|#infix3| restricts its operands to integers and sets the result type to boolean. However, when the operator is \verb|EQ| or \verb|NEQ|, the operands can be of any type as long as both have the same type. The resulting type will still be a boolean. \verb|#infix4| and \verb|#infix5| only allow booleans as operands and, again, returns a boolean.
\paragraph{Semantics}
A program consists of one compound expression.
\verb|COMPOUND| separates single expressions and evaluates them left to right. The rightmost expression is the return value of the expression and other return values are discarded.
Both \verb|#parens| and \verb|#block| contain a compound expression, however \verb|#block| introduces a deeper scope for the expression within, as clarified in \cref{variables-section}.
\paragraph{Code generation}
The literals generate a \verb|loadI| instruction. For character literals, the \verb|loadI| is followed by \verb|i2c| to make sure the loaded character is within the character range of the ILOC VM.
Operators first generate their operands, locking the result register, then generate their operator instruction and free the registers.
The compound expression simply generates its inner expressions in order. See \cref{basic-codegen} for an example of generated code.
\begin{figure}
\begin{subfigure}{0.2\textwidth}
\caption{Boppi code}
\begin{minted}{boppi}
5;
4+(2*3/-1);
'c';;;;
4+(2*3/-1);
{
true && false;
3
}+4;
\end{minted}
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.7\textwidth}
\caption{Generated ILOC}
\begin{minted}{boppi}
loadI 5 => __1 // 5
\subsubsection{Use}
\verb|#prefix1|, \verb|#infix1| and \verb|#infix2| restrict their operand types and result type to integers. \verb|#infix3| restricts their operand types to integers and sets its result type to boolean, except when the operator is EQ or NEQ, in which case the operands can be of any type as long as both have the same type. \verb|#infix4| and \verb|#infix5| restrict their operand types and result type to booleans.
loadI 99 => __1 // 'c'
i2c __1 => __1 // 'c'
\subsubsection{Semantics}
A program consists of one compound expression.
COMPOUND evaluates two expressions, discarding the result of the left hand side and passing the result of the right hand.
Both \verb|#parens| and \verb|#block| contain a compound expression, however \verb|#block| introduces a deeper scope for the expression within, as clarified in the next feature.
loadI 4 => __1 // 4
loadI 2 => __2 // 2
loadI 3 => __3 // 3
mult __2,__3 => __2 // *
loadI 1 => __3 // 1
rsubI __3,0 => __3 // unary -
div __2,__3 => __2 // /
add __1,__2 => __1 // +
\subsubsection{Code generation}
The compound expression simply generates its inner expressions in order, the literals generate a \verb|loadI| instruction and the operators generate a single corresponding instruction.
loadI 1 => __2 // true
loadI 0 => __1 // false
and __2,__1 => __2 // &&
loadI 3 => __1 // 3
loadI 4 => __2 // 4
add __1,__2 => __1 // +
\end{minted}
\end{subfigure}
\caption{Generated code for basic expressions in Boppi.}
\label{basic-codegen}
\end{figure}
\subsection{Variables}
\subsubsection{Syntax}
\begin{minted}{antlr}
singleExpr
\section{Variables}
\label{variables-section}
\paragraph{Syntax}
Variables introduce two more \verb|stat|ement types: declarations and assignments. A declaration introduces a variable name and restricts it to a certain type. The type can be either one of the three built-in types (boolean, character, integer) or a previously declared identifier. The optional \verb|CONST| keyword can be used to declare a variable constant, so it may only be assigned once. Secondly, an assignment sets the value of a variable to the result of an expression. Lastly, a variable can be used in an expression if it has been assigned. See \cref{variables-syntax} for the additional ANTLR parse tree elements.
\begin{figure}
\caption{ANTLR4 code for variables in Boppi.}
\label{variables-syntax}
\begin{minted}{antlr}
stat
: ...
| DECLARE type IDENTIFIER #declare
| <assoc=right> variable ASSIGN singleExpr #assign
| variable #variableExpr
| declareStat
| assignStat
;
declareStat
: ...
| DECLARE CONSTANT? type IDENTIFIER #declare
;
assignStat
: variable ASSIGN (assignStat | expr) #assign
;
expr
: ...
| variable #getVariable
;
type
: staticType=(INTTYPE | BOOLTYPE | CHARTYPE) #typeSimple
: ...
| staticType=(INTTYPE | BOOLTYPE | CHARTYPE) #typeSimple
| variable #typeVariable
;
variable: IDENTIFIER;
\end{minted}
variable
: ...
| IDENTIFIER #variableSimple
;
\end{minted}
\end{figure}
\paragraph{Examples}
\begin{minted}{boppi}
var int myInt;
myInt := 4;
See \cref{variables-example} for some examples of variable usage. Again, input and output is introduced in \cref{io-section}, so the result of an expression is listed in the comments.
\begin{figure}
\caption{Example code with variables in Boppi.}
\label{variables-example}
\begin{minted}{boppi}
var int myInt; //declares integer myInt (result: void)
myInt := 4; //myInt is now 4 (result: 4)
var myInt otherInt;
otherInt := 4+(myInt := 2);
var myInt otherInt; //declares otherInt with the same type myInt
otherInt := 4+(myInt := 2); //myInt is now 2 and otherInt is 6 (result: 6)
var bool aBool;
var bool aBool; //declares boolean aBool
aBool := {
var int otherInt;
otherInt := 12;
myInt > otherInt
};
\end{minted}
var const int otherInt; //declares a constant integer otherInt inside this block
otherInt := 12; //otherInt is now 12 (result: 12)
myInt > otherInt //(result: false)
}; //aBool is now false
\end{minted}
\end{figure}
\subsubsection{Use}
A variable can only be assigned and used after it has been declared and within the same scope or a deeper scope than where it is declared. A variable cannot be declared if the name is already used by another variable in the same scope.
\paragraph{Use}
A variable must be declared before it can be used and its type must be given in the declaration. Moreover, a variable can only be used within an expression once it has been assigned a value. The result of an expression can only be assigned to a variable if the types match.\\
Regarding lexical scopes, a variable only exists in the scope in which it is declared and deeper scopes. A variable can be redeclared, effectively hiding the original variable, in a deeper scope, linking the name to a new variable. The type of the redeclared variable does not have to match the original type.
\subsubsection{Semantics}
\verb|#declare| introduces a new variable to the current scope. This variable will be undeclared once the current scope is closed. The variable will be stored at an offset within the AR, thereby increasing the size of the AR.
\verb|#assign| evaluates the
\paragraph{Semantics}
\verb|#declare| introduces a new variable to the current scope. It does not perform any action during runtime, but the identifier and its type are recorded in a symbol table during compilation. Moreover, the compiler allocates a space for the variable in the local data segment of the main AR. The identifier and its allocated space will be freed once the current scope is closed.\\
\verb|#assign| first evaluates the expression on the right hand side and then stores the result at the space allocated for that variable.\\
\verb|#getVariable| retrieves the value of a variable from the allocates space.
\subsubsection{Code generation}
%wat voor ILOC wordt er gegenereerd voor deze functie?
\paragraph{Code generation}
Declarations do not generate any code. An assignment generates \verb|addI r_arp,k => r| followed by \verb|store r => r| (\verb|cstore r => r| for characters) for some register \verb|r|. The local offset, \verb|k|, is decided by the symbol table. The reason for calculating an address and using a plain \verb|store| instead of using \verb|storeAI| directly, is because the address is calculated differently for different kinds of variables. Likewise, a \emph{use} of a simple variable generates \verb|addI r_arp,k => r| followed by \verb|load r => r| (\verb|cload r => r| for characters).\\
An example of (chained) assignments, uses and offsets can be seen in \cref{variables-code}. As can be seen in de generated ILOC code, the variables x, y, unused, b and c have the respective offsets of 0, 4, 8, 9 and 13.
\begin{figure}
\begin{subfigure}{0.2\textwidth}
\caption{Boppi code}
\begin{minted}{boppi}
var int x;
var int y;
x := 4;
y := 3+x;
var char unused;
var bool b;
var bool c;
c := b := x < y;
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.7\textwidth}
\caption{Generated ILOC}
\begin{minted}{boppi}
loadI 4 => __1 // 4
addI r_arp,0 => __2 // add offset
store __1 => __2 // to x
loadI 3 => __1 // 3
addI r_arp,0 => __2 // add offset
load __2 => __2 // load address
add __1,__2 => __1 // +
addI r_arp,4 => __2 // add offset
store __1 => __2 // to y
addI r_arp,0 => __1 // add offset
load __1 => __1 // load address
addI r_arp,4 => __2 // add offset
load __2 => __2 // load address
cmp_LT __1,__2 => __1 // <
addI r_arp,9 => __2 // add offset
store __1 => __2 // to b
addI r_arp,13 => __2 // add offset
store __1 => __2 // to c
\end{minted}
\end{subfigure}
\caption{Generated code for basic variable use in Boppi.}
\label{variables-code}
\end{figure}
\subsection{Input/Output}
\subsubsection{Syntax}
\begin{minted}{antlr}
singleExpr
\section{Input/Output}
\label{io-section}
\paragraph{Syntax}
I/O introduces two expression types: input and output. In an input expression, a sequence of variables is provided and in an output expression a sequence of expressions can be provided. The ANTLR rules can be seen in \cref{io-syntax}.
\begin{figure}
\caption{ANTLR4 code for I/O in Boppi.}
\label{io-syntax}
\begin{minted}{antlr}
singleExpr
: ...
| IN PAROPEN variable (LISTDELIM variable)* PARCLOSE #read
| OUT PAROPEN expr (LISTDELIM expr)* PARCLOSE #write
;
\end{minted}
\end{minted}
\end{figure}
\paragraph{Examples}
\begin{minted}{boppi}
See \cref{io-example} for the basic use of input and output expressions.
\begin{figure}
\begin{subfigure}{0.5\textwidth}
\caption{Boppi code}
\begin{minted}{boppi}
var int anInt;
read(anInt);
print('a');
print(read(anInt)+4);
var bool aBool;
aBool := true;
print(aBool);
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.4\textwidth}
\caption{input and output}
\begin{minted}{text}
> 4
<<< a
> 8
<<< 12
<<< true
\end{minted}
\end{subfigure}
\caption{Example code for I/O in Boppi.}
\label{io-example}
\end{figure}
\end{minted}
\paragraph{Use}
Input expressions can only contain simple variables as arguments. If there is exactly one variable present, the type of the variable and its value will be passed out of the expression. Otherwise the result of the expression is \verb|void|.\\
Output expressions can only contain non-void expressions as arguments. Analogous to the input expression, when one argument provided, the type and value is passed out of the expression and \verb|void| is returned otherwise.
\subsubsection{Use}
%hoe gebruik je het? wat zijn de typerestricties?
\paragraph{Semantics}
The input expression stores a value in every variable argument by reading each value from the standard input. When exactly one variable is present, the result of the expression is that value, otherwise it is void.\\
The output expression prints the result of each expression to the standard output. Analogous to the input expression, when exactly one argument is given, this will be the result of the expression.\\
If a read or print action is undefined for a type, it will halt the machine with the status \emph{ERROR\_INPUT\_UNKNOWN\_TYPE} respectively \emph{ERROR\_OUTPUT\_UNKNOWN\_TYPE}.
\subsubsection{Semantics}
%semantiek
\paragraph{Code generation}
When printing expressions, the generator evaluates each expression and then prints it to the standard output. For printing an integer, the generator simply produces \verb|out "", r| where \verb|r| is the register holding the value of the current expression. For printing a character, the character is pushed onto the stack as a string and then printed using \verb|cout|, see \cref{character-output}. For printing a boolean, the generator calls a subroutine to print either \emph{true} or \emph{false} to the standard output. The subroutine can be seen in \cref{boolean-output}. For more information about the subroutine calling convention, see \cref{memlib-calling}.\\
When reading values to variables, the generator first reads from the standard input and then stores the value similarly to the assign statement. In case of booleans and integers, the generator simply produces \verb|in "" => r|. In case of a character, the generator calls a subroutine for reading a line and extracting exactly one character. The subroutine can be seen in \cref{character-input}. It reads a whole line at a time as per the \verb|cin| instruction. Empty lines are discarded and the first character of a non-empty line is returned. For the calling convention, again see \cref{memlib-calling}.
\subsubsection{Code generation}
%wat voor ILOC wordt er gegenereerd voor deze functie?
\begin{figure}
\caption{ILOC for printing a single character stored in register r.}
\label{character-output}
\begin{minted}{boppi}
cpush r
loadI 1 => r_t
push r_t
cout ""
\end{minted}
\end{figure}
\begin{figure}
\caption{\emph{stdlib} ILOC for writing a boolean.}
\label{boolean-output}
\begin{minted}{boppi}
// write a boolean to output
// stack: [return address, bool] -> []
stdbout: pop => m_1 // get boolean
loadI 0 => m_2 // load zero-length string
push m_2
cbr m_1 -> sbout_t,sbout_f
sbout_t: cout "true"
jumpI -> sbout_e
sbout_f: cout "false"
sbout_e: pop => m_1 // load return address
jump -> m_1
\end{minted}
\end{figure}
\begin{figure}
\caption{\emph{stdlib} ILOC for reading a single character.}
\label{character-input}
\begin{minted}{boppi}
// read a character from input
// stack: [return address] -> [char]
stdcin: cin "" // get line
pop => m_1 // get length
cbr m_1 -> scin_t,stdcin // repeat until at least one character
scin_t: cpop => m_2 // save character
scin_lc: subI m_1, 1 => m_1 // decrement char count
cbr m_1 -> scin_ll,scin_le
scin_ll: cpop => m_0 // discard character
jumpI -> scin_lc // repeat
scin_le: loadI 0 => m_0 // reset zero register
pop => m_1 // get return address
cpush m_2 // push result character
jump -> m_1
\end{minted}
\end{figure}
\subsection{}
\subsubsection{Syntax}
\begin{minted}{antlr}
\end{minted}
\section{Conditional code}
\label{conditionals}
\paragraph{Syntax}
Conditionals extend the Boppi language with two expression types. The \verb|#if| expression has an optional \verb|ELSE| clause. The extra syntax can be seen in \cref{conditional-syntax}.
\begin{figure}
\caption{ANTLR4 code for conditionals in Boppi.}
\label{conditional-syntax}
\begin{minted}{antlr}
expr
: ...
| IFOPEN cond=stats IFTRUE onTrue=stats (IFFALSE onFalse=stats)? IFCLOSE #if
| WHILEOPEN cond=stats WHILETRUE onTrue=stats WHILECLOSE #while
\end{minted}
\end{figure}
\paragraph{Examples}
\begin{minted}{boppi}
\end{minted}
A few examples of \emph{if} and \emph{while} expressions can be seen in \cref{conditional-example}. The first \emph{if} construction shows how it can be used as an expression with a result, in this case either the character \verb|T| or \verb|F|. Next is an \emph{if} construction with only a consequent and no alternative. Lastly two \emph{while} constructions are presented to show the use of scoped variables and looping.
\begin{figure}
\begin{subfigure}{0.6\textwidth}
\caption{Boppi code}
\begin{minted}{boppi}
var int x;
print(if read(x) > 4 then 'T' else 'F' fi);
\subsubsection{Use}
%hoe gebruik je het? wat zijn de typerestricties?
if x == 8 then
print('H','i')
fi;
\subsubsection{Semantics}
%semantiek
while var bool cont; read(cont) do
var int y;
y := x;
x := print(y+x);
od;
\subsubsection{Code generation}
%wat voor ILOC wordt er gegenereerd voor deze functie?
var int i;
var int n;
i := 1;
n := 0;
while i < x do
n := n+i;
i := i+1;
od;
print(n);
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.3\textwidth}
\caption{input and output}
\begin{minted}{text}
> 8
<<< T
<<< H
<<< i
> 1
<<< 16
> 1
<<< 32
> 0
<<< 496
\end{minted}
\end{subfigure}
\caption{Example of conditionals in Boppi.}
\label{conditional-example}
\end{figure}
\paragraph{Use}
The \emph{if} expression can freely be used with and without an alternative (\verb|ELSE|) and the result types of the consequent and the alternative can be of any type. The condition has to result in a boolean type and the whole expression will generally return \emph{void}. However, when the consequent and alternative result in the same type, the \emph{if} expression will have this return type. Then, when executed, the expression will return the result of the branch taken.\\
The \emph{while} expression has one form that also requires the condition to have a boolean type and allows the body to have any type. The expression will always return \verb|void|.
\paragraph{Semantics}
The \emph{if} expression first executes the condition and executes the consequent only if the condition is \verb|true|. If the condition is \emph{false} and there is an alternative, the alternative will be executed.\\
The \emph{while} expression executes the condition and executes the body if the condition is \verb|true|. Also, if the condition is \verb|true|, it will then repeat the while expression.
\paragraph{Code generation}
The \emph{if} statement generates two or three jump targets, depending on whether an alternative clause is present. In both cases the condition is visited followed by a \verb|cbr| instruction based on the value of the condition.\\
When no alternative is present, the \verb|cbr| jumps to either the \verb|if_t| or \verb|if_e| target. A \verb|if_t: nop| is produced immediately afterwards, after which the consequent is visited. Lastly a \verb|if_e: nop| is produced. Effectively the consequent is skipped when the condition is false.\\
When an alternative is present, the \verb|cbr| jumps to either the \verb|if_t| or \verb|if_f| target, after which a \verb|if_t: nop| is produced and the consequent is visited. If the expression is to return a value, \verb|i2i r1 => r2| a \verb|jumpI -> if_e| is produced. Next, for the alternative, a \verb|if_f: nop| is produced and the alternative is visited. \cref{if-code} is an example of an \emph{if} expression with three targets and a return value.\\
The \emph{while} loop has three jump targets, one for evaluating the condition, one for executing the body of the loop and one for breaking out of the loop. The compiler first produces a \verb|jumpI -> while_f| to jump to the condition. Then a \verb|while_t: nop| is produced as a jump target followed by the loop body. Next, a jump target for the condition, \verb|while_f: nop|, and the condition and \verb|cbr r_k -> while_t,while_e| are produced, where \verb|r_k| is the register that holds the result of the condition. Finally, the breaking jump target \verb|while_e: nop| is produced. See \cref{while-code} for an example.
\begin{figure}
\begin{subfigure}{0.2\textwidth}
\caption{Boppi code}
\begin{minted}{boppi}
if 2 > 1 then
'T'
else
'F'
fi
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.7\textwidth}
\caption{Generated ILOC}
\begin{minted}{boppi}
loadI 2 => __1 // 2
loadI 1 => __2 // 1
cmp_GT __1,__2 => __1 // >
cbr __1 -> if_t0,if_f1 //
if_t0: nop //
loadI 84 => __2 // 'T'
i2c __2 => __2 // 'T'
i2i __2 => __1 // result
jumpI -> if_e2 //
if_f1: nop //
loadI 70 => __2 // 'F'
i2c __2 => __2 // 'F'
i2i __2 => __1 // result
\end{minted}
\end{subfigure}
\caption{Generated code for an if expression in Boppi.}
\label{if-code}
\end{figure}
\begin{figure}
\begin{subfigure}{0.2\textwidth}
\caption{Boppi code}
\begin{minted}{boppi}
while true do
1
od
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.7\textwidth}
\caption{Generated ILOC}
\begin{minted}{boppi}
jumpI -> while_f1 // to condition
while_t0: nop // loop target
loadI 1 => __1 // 1
while_f1: nop // condition target
loadI 1 => __1 // true
cbr __1 -> while_t0,while_e2 //
while_e2: nop // end target
\end{minted}
\end{subfigure}
\caption{Generated code for a while expression in Boppi.}
\label{while-code}
\end{figure}
\section{Functions}
\label{functions}
\paragraph{Syntax}
Functions introduce one new mode of declaration and one new mode of expression. The declaration of a function takes a name, an optional sequence of parameters and an optional return value. These parameters each have a type and a name. A function call requires a variable (the name of the function) followed by a sequence of expressions between parentheses. Moreover, the feature introduces the arrow and tuple at the type level, so function types can be constructed. The arrow denotes a function from the (tuple) type on the left to the type on the right, whereas the tuple is a sequence of types. The ANTLR rules can be seen in \cref{functions-syntax}.
\begin{figure}
\caption{ANTLR4 code for functions in Boppi.}
\label{functions-syntax}
\begin{minted}{antlr}
declareStat
: ...
| FUNCTION (result=type)? name=IDENTIFIER PAROPEN parameters? PARCLOSE body=expr #declareFunction
expr
: ...
| variable PAROPEN (expr (LISTDELIM expr)*)? PARCLOSE #call
parameters
: ...
| type IDENTIFIER (LISTDELIM type IDENTIFIER)*
type
: ...
| type ARROW type #typeFunction
| PAROPEN (type (LISTDELIM type)*)? PARCLOSE #typeTuple
\end{minted}
\end{figure}
\paragraph{Examples}
Functions can be used in many ways in Boppi. They can have any number of arguments and a return value is optional. Since Boppi allows for side effects, functions without a return value have their use. One example is shown in \cref{functions-example}: \verb|logPrint| counts the number of times the function has been called by incrementing the non-local variable \verb|numCalls|. A function with a return value can used in an expression, as can be seen in the same example with the function \verb|add|.
\begin{figure}
\begin{subfigure}{0.6\textwidth}
\caption{Boppi code}
\begin{minted}{boppi}
var int numCalls;
numCalls := 0;
function logPrint(int n) {
numCalls := numCalls+1;
print(n);
};
logPrint(1);
logPrint(2);
print(numCalls);
function int add(int a, int b) a+b;
print(10*add(3, 4));
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.3\textwidth}
\caption{input and output}
\begin{minted}{text}
<<< 1
<<< 2
<<< 2
<<< 70
\end{minted}
\end{subfigure}
\caption{Example of functions in Boppi.}
\label{functions-example}
\end{figure}
\paragraph{Use}
The function declaration has an optional return type, a name, a sequence of parameters in parentheses and an expression that is the body of the function. The sequence of parameters may be empty and each parameter consists of a type and a name. After the declaration, the function variable is marked \emph{constant} and \emph{assigned}. Since the function declaration enters a new scope, the parameter names may override variable names outside the function. Inside the body of the function, the parameters will have a value \emph{assigned}. If the function has a return type, the function body must return this type. Because the function declaration as a whole is a declare statement, it has no result type and must not be the last statement of a block.\\
The function call consists of a name followed by a sequence of expressions in parentheses. The function name must match a variable that has a function type. The number of expressions must match the number of formal parameters of the function. Each expression must be of the same type as the corresponding formal parameter. If the function has a return type, the function call results in this type, otherwise the result type is \verb|void|.\\
A function type must always have an arrow at the top level and a tuple on its left side. A function variable is not initialized at declaration and there is no run-time check during a function call, so the user should heed any warning that a variable may not be assigned. Lastly, there is currently no way to construct a function type with no return value, so variables and parameters can only have these types by copying the type of another function.\\
\paragraph{Semantics}
A function declaration creates a variable in the current scope with the name of the function. The type is constructed as a tuple of the parameter types and the return type. The return type will \verb|void| if not present. The function variable is marked assigned. The body of the function is not evaluated.\\
A function call first retrieves the function reference and constructs an activation record (\emph{AR}). Then each of the parameters is evaluated and their results are stored within the AR. Finally, the program jumps to the body of the function.\\
The body of a function declaration links all the formal parameters to places within the AR, which are assigned a value by the function call. During a call, first the body of the function is evaluated. Next, if there is a result, it will be stored in the AR. Then, the AR will be dereferenced and, if it is marked for deletion, all local reference variables will be dereferenced. Finally, the control flow is moved back to where the function call was made.\\
At the end of a function call, the result is loaded, if there is any.
\paragraph{Code generation}
In a function declaration, the generator performs the following steps:
\begin{enumerate}
\item Jump over the function body.
\item Visit the inner expression.
\item If the function is to return a value, generate a \verb|storeAI r_res => r_arp, OFFSET_RETURN|, with \verb|r_res| the register that holds the result of the inner expression.
\item Check whether there is only one reference left to the current AR. If so, free (decrement the reference count of) all local variables where applicable.
\item Generate a \verb|loadAI r_arp, OFFSET_RETURN_ADDRESS => r_temp| and \verb|jump -> r_temp| to return to the call site.
\item Allocate a tuple for the target address, current AR and desired AR size.
\item Store the relevant values in that tuple, with the target address and AR size decided at compile-time, while the current AR is decided at run-time.
\item Store the address of the tuple in the function variable's offset.
\item Increment the reference count of the current AR and its parents.
\end{enumerate}
The tuple is required for two reasons. Firstly, because the current AR is relevant in case the function uses non-local variables and is used outside the current function. Secondly, because the AR size is relevant due to \cref{problems-reassigned-closures}. The tuple is referenced rather than stored in place because it is easier to move around a single integer.\\
The references to the AR's parents are incremented because of \cref{problem-closures}.\\
Freeing local reference variables at the end of the function is done as a simple mechanism to clean up memory. A better approach would be to generate cleanup procedures, as discussed in \cref{future-work}.\\
In a function call, the generator performs a number of steps:
\begin{enumerate}
\item Retrieve the function variable.
\item Load the function tuple using \verb|load r_var => r_tuple| with \verb|r_var| the register holding the address of the function variable.
\item Allocate the AR using the size specified in the tuple, \verb|loadAI r_tuple, OFFSET_AR_SIZE => r_narp|.
\item Shift the pointer to the callee AR by \verb|addI r_narp, AR_BASE_SIZE => r_narp| so AR properties have negative offsets and local variables (including parameters) start at an offset of 0.
\item Evaluate each parameter of the function and copy the result to the callee AR using \verb|storeAI r_res => r_narp, c_offset|, with \verb|c_offset| the offset of the formal parameter.
\item Save all registers that are in use by pushing them to the stack.
\item Save the current AR in the callee AR, \verb|storeAI r_arp => r_narp, OFFSET_CALLER_ARP|, to retrieve it after the call.
\item Copy the parent AR that belongs to the function tuple to the callee AR.
\item Increment the reference count to the parent ARs of the callee.
\item Save the return address to the callee AR.
\item Switch to the callee AR.
\item Jump to the call site pointed to by the function tuple.
\item Decrement the reference count to the callee AR and its parents.
\item Restore all the registers that were in use by popping them from the stack.
\item Load the result of the function, if any.
\item Restore the AR.
\end{enumerate}
Note there is no particular reason why the registers are saved to the stack, while the caller's ARP and return address are saved to the callee's AR.\\
For a simple example of a function declaration and call, see \cref{functions-code}. Because the generated ILOC spans more than 100 lines, it can be found as a separate file.
\begin{figure}
\begin{subfigure}{0.5\textwidth}
\caption{Boppi code}
\begin{minted}{boppi}
function int successor(int n)
n+1;
var int x;
read(x);
print(successor(x));
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.5\textwidth}
\caption{Generated ILOC}
See \emph{doc/successor.iloc.txt}. Instructions 178-193 form the body of \verb|successor| with 180-183 generated by the expression in the body, 194-217 form the allocation of \verb|successor| as a variable, 222-270 form a call to \verb|successor| and 272-293 form the deallocation of \verb|successor|.
\end{subfigure}
\caption{Generated code for functions in Boppi.}
\label{functions-code}
\end{figure}
\section{Arrays}
\label{arrays}
\paragraph{Syntax}
Arrays add new syntax in three places in the language. It introduces a way to construct an array type of any type and two ways two construct an array. The first way to construct an array is providing an array literal: \verb|[ element1, element2, ... ]|. The second way is to provide the element type and the number of elements: \verb|array( type, length )| where length can be any integer expression. The choice was made to require the element type, because it allows the type checking to only use a synthesized attribute. For the same reason, an array literal must contain at least one item. Lastly, arrays introduce two variable constructions: the array element accessor and the property accessor. The ANTLR rules can be seen in \cref{arrays-syntax}.\\
The way arrays are declared and defined is contrary to the assignment. While arrays were defined as fixed-size vectors in a previous iteration of the language, this was considered too restrictive in practice.
\begin{figure}
\caption{ANTLR4 code for arrays in Boppi.}
\label{arrays-syntax}
\begin{minted}{antlr}
expr
: ...
| ARRAY PAROPEN type LISTDELIM size=expr PARCLOSE #defineArray
| ARROPEN expr (LISTDELIM expr)* ARRCLOSE #literalArray
type
: ...
| type ARROPEN ARRCLOSE #typeArray
variable
: ...
| variable ARROPEN expr ARRCLOSE #variableArray
| variable PROP IDENTIFIER #variableProperty
\end{minted}
\end{figure}
\paragraph{Examples}
An example of using arrays can be seen in \cref{arrays-example}.
\begin{figure}
\begin{subfigure}{0.6\textwidth}
\caption{Boppi code}
\begin{minted}{boppi}
var int[] fibs; fibs := [1,1,2,3,5,8];
var int i; read(i);
while i < fibs.length do
print(fibs[i]);
i := i+1;
od
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.3\textwidth}
\caption{input and output}
\begin{minted}{text}
> 2
<<< 2
<<< 3
<<< 5
<<< 8
\end{minted}
\end{subfigure}
\caption{Example of arrays in Boppi.}
\label{arrays-example}
\end{figure}
\paragraph{Use}
Array variables are not assigned at declaration. As such, the user should heed a warning that a variable may not be assigned, since their value may point anywhere. Moreover, an array may contain undefined elements, which neither the compiler nor the run-time will detect.\\
Array variables have exactly one named property, their \verb|length|. This is always non-negative for assigned arrays and undefined otherwise. All other types up to here have no properties.\\
An array literal may contain any positive number of elements, which must all have the same type. The type of the resulting array is, naturally, an array of the elements' type and the length is equal to the number of expressions in the literal.\\
An array constructor comprises a type, which will be the type of the elements, and a non-negative number of items. Note that the elements will be undefined.\\
An array accessor may only be used on an array type. The result type will be the element type of the array.\\
Arrays can be compared with each other for equality if they have the same element type.\\
Lastly, an array of characters can be printed to standard output and can be read from standard input.\\
Note that, while an array variable may be defined constant, its elements can still be changed.
\paragraph{Semantics}
An array is a finite sequence of items of a single type whose values can be retrieved through a zero-based index. An array literal creates an array exactly large enough to hold all the expressions inside, then evaluates the expressions left-to-right and puts the results in the corresponding array index. An array constructor simply evaluates the requested length and allocates an array of that length, or halts the machine if the length is negative.\\
Assigning an array to a variable means that variable will point to the array from that point. This means an expression like \verb|array1 := array2;| will result in both variables pointing to the same array, so changing an element of \verb|array1| will change the element for \verb|array2|.\\
An array accessor evaluates the index expression and then returns the element at that index, or halts the machine if that index is out of bounds.\\
An equality check between two arrays compares the length and each element of the array. Note that, for nested arrays, this will compare the addresses of the inner arrays, rather than the length and values within those arrays.
\paragraph{Code generation}
The array constructor first evaluates the expression, then generates a check whether the array size is valid and either \verb|halt|s the machine or allocates the array.\\
An array literal generates the allocation of the array, then, for each expression, evaluates it and puts the result in the array using \verb|storeAI r_res => r_array, c_offset|, with \verb|r_res| the result of the expression, \verb|r_array| the base address of the array and \verb|c_offset| the offset of the particular element calculated at compile-time.\\
An array access generates the following steps (illustrated in \cref{arrays-access-snippet}):
\begin{enumerate}
\item Visit the array variable.
\item Load the array's address.
\item Visit the index expression.
\item Load the array's memory size and divide it by the element size.
\item Check whether the calculated index is less than the array's size and not negative. Halt if this is not the case.
\item Multiply the index by the element size to get the offset and add it to the array's base address to get the address of the element.
\end{enumerate}
Retrieving the length of an array requires a few steps because the length is only stored implicitly. The generator first retrieves the array variable. Then it produces a \verb|load r_temp => r_temp| instruction to get the array's base address, followed by a \verb|addI r_temp, OFFSET_OBJECT_SIZE => r_temp| and \verb|load r_temp => r_temp| to retrieve the memory size. Lastly, it produces \verb|divI r_temp, c_element_size => r_temp| to convert the size to the number of elements.
\begin{figure}
\caption{Array access snippet from \cref{arrays-code}.}
\label{arrays-access-snippet}
\begin{minted}{boppi}
addI r_arp,0 => __2 // add offset
load __2 => __2 // get array object
loadI 0 => __3 // 0
loadAI __2,-4 => __1 // check array index
divI __1,4 => __1 // check array index
cmp_LT __3,__1 => __1 // check array index
cmp_GE __3,r_nul => __4 // check array index
and __1,__4 => __4 // check array index
cbr __4 -> nob5,oob4 // check array index
oob4: haltI 1634692962 // array index out of bounds
nob5: multI __3,4 => __3 // multiply index by size
add __2,__3 => __2 // get array index address
\end{minted}
\end{figure}
See \cref{arrays-code} for a simple example of a nested array. Because the generated ILOC spans around 100 lines, it can be found as a separate file.
\begin{figure}
\begin{subfigure}{0.2\textwidth}
\caption{Boppi code}
\begin{minted}{boppi}
var int[][] matrix;
matrix := [
[1,2],
[3,4]
];
print(matrix[0][1]);
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.7\textwidth}
\caption{Generated ILOC}
See \emph{doc/nestedArray.iloc.txt}. Lines 178-183 allocate the top-level array, whereas 184-189 and 196-200 allocate the second-level arrays. Lines 206-215 try to decrement the reference to the current array of \verb|matrix| if assigned, which it isn't. Lines 217-229 increment and decrement the reference count of the outer array because of the assignment expression and the statement ending. Lines 230-253 retrieve the matrix's element (0,1), which is the two.
\end{subfigure}
\caption{Generated code for arrays in Boppi.}
\label{arrays-code}
\end{figure}
\section{Reference types}
This is an addendum to function variables and arrays. Both of these are reference types, meaning that the values of the variables are merely pointers to a heap-allocated segment of data. In order to automatically garbage collect the objects, the compiler needs to change the reference count (\emph{RC}) whenever a reference type is used.\\
When a reference type is used in an expression, its RC is incremented. When the result of an expression is discarded, e.g. at the end of a statement or in an output expression with multiple arguments, a reference type will have its RC decremented. In an assignment with a reference type, first the expression is evaluated, then the RC of the old value of the variable is decremented and finally the RC of the new value is incremented.

View File

@ -1 +1,13 @@
\inputminted{antlr}{\projectroot src/pp/s1184725/boppi/antlr/Boppi.g4}
The ANTLR grammar is split into its lexer rules and parser rules. The lexer rules can be seen in \cref{boppi-lexer} and the parser rules in \cref{boppi-parser}.
\begin{code}
\caption{Lexemes of the Boppi language.}
\label{boppi-lexer}
\inputminted{antlr}{\projectroot src/pp/s1184725/boppi/antlr/BoppiTokens.g4}
\end{code}
\begin{code}
\caption{ANTLR4 parser rules of the Boppi language.}
\label{boppi-parser}
\inputminted{antlr}{\projectroot src/pp/s1184725/boppi/antlr/Boppi.g4}
\end{code}

View File

@ -1,15 +1,71 @@
% Problems and solutions. Summary of problems you encountered and how you solved them (max. two
% pages).
\subsection{Function calls within parameters}
Each function call requires an AR. When new ARs are allocated at a static offset relative to the current AR, this may lead to problems with function calls of the form \verb|f(g(h))| or \verb|f(a, g(h))|: the AR of \verb|f| must not be overwritten during the call to \verb|g|. There are multiple solutions to this problem:
\section{Large expression trees}
Expressions of the form $((a \star b) \star (c \star d)) \star ((e \star f) \star (g \star h))$ that cannot be rearranged to shallower trees (especially if $a..g$ have side effects that influence each other) require much temporary storage. This may be solved in one of three ways:
\begin{enumerate}
\item dynamically allocating the AR (requires a dynamic allocator)
\item generating different offsets for nested function calls (e.g. the AR of \verb|h| in \verb|f(g(h()))| must be allocated at \verb|ARP+f_AR+g_AR|)
\item allocating the AR after evaluating its parameters (requires a temporary storage for the parameters)
\item always push a sub-expression to the stack and pop sub-expressions when the main expression can reduce them (stack machine)
\item increase the number of registers to fit all sub-expression results
\item treat expressions as functions so the implementation of function calls will store the sub-expressions
\end{enumerate}
A combination of solutions is also possible. For example, store results in registers until all registers are in use, then start pushing results to the stack, or always push results to the stack and remove redundant push-pop instructions during an optimization pass.
\paragraph{Solution}
The second solution is chosen for its simplicity, performance and the fact that the ILOC VM supports infinite registers.
\section{Function calls within parameters}
Each function call requires an Activation Record (\emph{AR}). A straightforward way of allocating new ARs is allocation at a static offset relative to the current AR. This may lead to problems with function calls of the form \verb|f(g(h))| or \verb|f(a, g(h))|: the AR of \verb|f| must not be overwritten during the call to \verb|g|.\\
There are multiple solutions to this problem:
\begin{enumerate}
\item dynamically allocating the AR: this requires a dynamic allocator
\item generating different offsets for function calls within the parameter of a function call: e.g. the AR of \verb|h| in \verb|f(g(h()))| must be allocated at \verb|ARP+f_AR+g_AR|
\item allocating the AR \emph{after} evaluating its parameters: this requires a temporary storage for the parameters
\end{enumerate}
The third option was chosen, namely by pushing each parameter value to the stack, then allocating the AR for the outer function call and popping the values into it.
\paragraph{Solution}
Initially, the third option was chosen for its ease of implementation. This was implemented by first pushing each parameter value to the stack after evaluation. Then the AR for the outer function call was allocated and the parameter values were popped into the AR.\\
\subsection{Register saves}
When calling a function, some registers must be saved by the caller or callee in order to not lose their value when executing the callee. The choice was made to use caller-saves and to keep track of all the registers that are required for future instructions.
Later, with the implementation of a dynamic allocator, the first option was chosen. The AR is now dynamically allocated and a reference to the AR is saved in a (reserved) register, with each parameter being stored in the AR immediately after evaluating.
\section{Register saves}
When calling a function, some registers must be saved in order to not overwrite and lose their value when executing the function. This can be done either by the calling function (\emph{caller-saves}) or the called function (\emph{callee-saves}).
\paragraph{Solution}
The choice was made to use caller-saves. Moreover, the generator keeps track of registers that are in use in order to keep the number of saved registers to a minimum.\\
Registers are saved by pushing them on the stack before a call and popping them from the stack after a call. There is no dedicated register save area in the AR.
\section{Closures (for function passing)}
\label{problem-closures}
Fully correct function passing (i.e. closures) presents three problems:
\begin{enumerate}
\item keeping track of functions being passed around: the context of a function must be preserved, i.e. the parent ARs, in order to access non-local variables.
\item keeping track of active ARs: an AR must only be deallocated when no function can access it anymore.
\item allocating ARs: since an AR may remain allocated after the function call has ended, ARs cannot be simply pushed to and popped from the stack.
\end{enumerate}
One way to keep track of the parent AR of a function, is to add an extra field for function values, turning function types into a tuple \verb|(call address,parent AR)| reference instead of a direct call address value.\\
One way to decide which ARs must be kept and which can be deallocated, is to have a reference count (\emph{RC}) for each AR. When a function reference is used, all its parent ARs have their RC incremented. When a function reference is discarded, all its parent ARs have their RC decremented.\\
Another way using RCs is to only change the RC of the direct parent AR. When a function reference is used, the direct parent's AR has its RC incremented. When a function reference is discarded, the parent AR has its RC decremented. However, if the parent AR is freed in the process, it must decrement the RC of \emph{its} parent and so on. See \cref{future-work} for more discussion about deallocating reference types.\\
\paragraph{Solutions}
To address the dynamic allocation, a simple allocator with reference counting was made, see \nameref{memlib}.\\
Function data becomes a tuple \verb|(call address, parent AR)|. This may be extended in the future to \verb|(call address, AR)| for continuations.\\
Keeping track of active ARs is solved by incrementing and decrementing the whole chain of parent ARs. This solution was chosen because of its simplicity.
\section{Local data size for reassigned functions}
\label{problems-reassigned-closures}
Neither the type, scope nor entry point of a function specify the local data size of a function. This poses a problem when a function call is made to an assigned/reassigned function.
This can be solved in two ways:
\begin{enumerate}
\item resize the AR during the prologue of a call
\item store the required size of the AR in the function reference
\end{enumerate}
\paragraph{Solution}
The second solution was chosen because it was easier to implement.

View File

@ -2,3 +2,163 @@
% table management, type checking, code generation, error handling, etc. In your description, rely
% on the concepts and terminology you learned during the course, such as synthesised and inherited
% attributes, tree listeners and visitors.
The compiler chain is written in Java mostly, with a preamble (\emph{memlib}) written in ILOC. The following sections describe the Java classes and the ILOC preamble.
\section{Toolchain helper}
The toolchain helper \emph{pp.s1184725.boppi.ToolChain} contains various helper methods for compiling and executing programs. Notably, a \verb|Logger| object is required for nearly all methods. This way warnings and errors can be reported instead of throwing exceptions or failing silently.\\
Moreover, the helper contains a method to print the abstract syntax tree (\emph{AST}) of a Boppi program as a graphviz graph. The AST can be produced at any point in the compilation process. After the checking phase and the generating phase the AST will be annotated with types, variables and registers used.\\
The helper also provides a method to modify a \verb|Logger| object to append logged items to a list instead of the standard output. This can be useful for collecting problems and displaying them in a window or file and for test automation.\\
\section{Checker}
The correctness checker \emph{pp.s1184725.boppi.BoppiChecker} performs type checking, binding identifiers to variables, checking constants are assigned once and checking variables are assigned before being used. This is done on a bare parse tree of a Boppi program.\\
The checker is implemented as a tree visitor, since it allows to change state between visiting different children of a node. This is advantageous for e.g. the if-then expression in which a scope has to be opened between the test and the conditional code (see \cref{conditionals}).\\
The only inherited attributes during checking are the booleans \verb|inLhs| and \verb|inType|. These are implemented as local variables rather than rule attributes. \verb|inLhs| tracks whether a variable is being assigned or is used in an expression. This information is used to decide whether a constant is assigned a value twice and whether a variable is used before being initialized. \verb|inType| tracks whether a variable is used in a type-level expression, in which it may be used regardless of whether it is initialized.\\
The synthesised attributes during checking are the type of a node (\verb|Annotations::types|) and, when applicable, the variable belonging to an identifier (\verb|Annotations::variables|) and the local variables of a function (\verb|Annotations::function|). The latter are only used in the generating phase.\\
The checker tries to check a whole program best-effort. When a problem is encountered, e.g. an illegal redefinition of a variable, the problem is reported and the expression is ignored when possible. When ignoring is not an option, e.g. using an undefined variable in an expression, the type is set to \verb|void| and a chain of errors may be reported.\\
All errors and warnings are reported to a \verb|Logger| that is provided to the checker.
\section{Generator}
The machine code generator \emph{pp.s1184725.boppi.BoppiGenerator} builds an ILOC program from a checked and annotated parse tree. Like the checker, it is implemented as a tree visitor. This gives fine-grained control over the order in which instructions are generated and may lead to fewer jumps and registers. The generator only publicly exposes a static method for generating a program, because of the statefulness of a generator object.\\
The generator passes the result register as a synthesised attribute. The only other attributes are global and include the \verb|Annotation|s, \verb|RegisterPool| and the \verb|Program| produced so far.\\
While building a program, the generator reserves and uses registers drawn from a \verb|RegisterPool|. For example, in a sum, first the left operand is evaluated, then the result register is blocked, then the right operand is visited, then the left result register is freed and finally the addition is generated using both results. This is illustrated in \cref{generator-sum}.\\
\begin{figure}
\begin{subfigure}{0.7\textwidth}
\begin{minted}{java}
@Override
public Reg visitInfix2(Infix2Context ctx) {
Reg lhs = visit(ctx.lhs);
Reg rhs = regPool.blockReg(lhs, () -> visit(ctx.rhs));
emit(ctx.getChild(1).getText(), ops.get(ctx.op.getType()), lhs, rhs, lhs);
return lhs;
}
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.2\textwidth}
\begin{minted}{boppi}
loadI 32 => r
loadI 10 => g
add r, g => r
\end{minted}
\end{subfigure}
\caption{Java code for generating an addition expression and code example of 32+10.}
\label{generator-sum}
\end{figure}
The generator has a number of helper methods to generate calls to \emph{memlib} functions. These methods take a number of registers and produce a sequence of ILOC instructions. Moreover, it has a few helper methods to increment and decrement AR references because of \cref{problem-closures}.\\
The generator has a few scenarios for which it produces \verb|haltI| instructions with the appropriate \verb|ERROR_x| value. They may either be generated due to forcing an incorrect program to compile or due to a runtime error. Currently negative array sizes and array out-of-bounds errors are the only runtime errors that lead to a \verb|haltI| instruction.\\
Lastly, the generator prepends a program with \emph{memlib} and \emph{stdlib} to have access to basic functions.
\section{Symbol table}
The symbol table \emph{pp.s1184725.boppi.CachingSymbolTable} keeps track of existing symbols (variables) while the checker traverses a Boppi program. It is generic for the type system. The symbol table also manages lexical scope objects (\emph{pp.s1184725.boppi.FunctionScope.java}) to decide variable offsets and local data sizes.\\
The symbol table has three methods for variable symbols and act analogous to dictionaries: \verb|get|, \verb|put| and \verb|has|. Furthermore, there are six methods for opening and closing scopes, of which two are \"safe\" as they both open and close a scope. \verb|withFunctionScope| opens a lexical scope for a function, runs the provided function and then closes the scope. \verb|withScope| also opens a scope, runs the provided function and closes the scope, however variables will be produced by an enclosing function scope.\\
For example, in \cref{symbol-table-scopes} x, y, nested x and z are all given offsets in the same \verb|FunctionScope|. However, the nested x and z are defined in a deeper lexical scope, so they only exist within those scopes and their name may override a variable name in the same function scope (but a higher lexical scope). Moreover, since nested x and z are in unrelated scopes, they may have the same offset in the function.
\begin{figure}
\caption{Scope example in Boppi.}
\label{symbol-table-scopes}
\begin{minted}{boppi}
function main() {
var int x;
var int y;
{
var int x;
x := 1;
};
{
var int z;
z := 1;
};
}
\end{minted}
\end{figure}
\section{FunctionScope}
The lexical scope class \emph{pp.s1184725.boppi.FunctionScope} contains local variables within a function. An object is created with a given lexical depth, which can be retrieved at any time.\\
The \verb|FunctionScope::addVariable| method produces a variable of the provided type at the FunctionScope's lexical depth and current offset. This variable is both recorded in the object and returned. This method is used by the symbol table to produce a variable for each symbol.\\
The generator uses the function scope to determine how large the local data size for a function has to be and to allocate and deallocate objects where applicable.
\section{memlib}
\label{memlib}
\emph{memlib} is the memory allocator module for Boppi programs. It maintains a list of free blocks on the heap in between allocated objects, akin to the C dynamic memory allocator (\emph{malloc}, \emph{free}). \emph{memlib} currently has three subroutines: \verb|memalloc|, \verb|memaddref| and \verb|memfree|. \verb|memalloc| allocates a piece of free memory for an object and sets the reference count to 1. \verb|memaddref| increments the reference count of an object by 1. \verb|memfree| decrements the reference count by 1 and deallocates the object if the count goes to zero. To improve the correctness of programs, allocated memory is always zeroed.\\
Blocks of objects and free space both have a header of eight bytes (\verb|2*INT_SIZE|) and always contain the size of the block excluding the header. Additionally, free blocks have a pointer to the next free block, whereas objects have a reference count. See \cref{memlib-objects} for the general structure of allocated and free blocks.\\
At the start of a program, \emph{memlib} builds one block of \verb|brk-3*INT_SIZE| of free space, where \verb|brk| is a special purpose register pointing to the end of the heap space. The free block starts at address \verb|3*INT_SIZE|, excluding the header, and is pointed at by address zero, as can be seen in \cref{memlib-start}. After some allocations and deallocations, the memory may end up looking like \cref{memlib-runtime}.\\
When calling \verb|memalloc|, the algorithm iterates over the free blocks in search of a block of exactly the right size or a size of at least 16 bytes (\verb|2*INT_SIZE|) more. The algorithm keeps track of the last \emph{next free block} pointer to reassign it when a suitable free block is found. If the current free block has exactly the right size, the previous free block is linked with the next block and the current block is turned into the object. If a suitable free block is larger, the object is allocated at the start of this space and a new free block is created at the end of this space and the free blocks are linked again.\\
For example, when allocating a single object of 8 bytes at the beginning of a program, the object and its header are allocated at bytes 4-20, while the free block is moved and resized to bytes 20-80. Compare \cref{memlib-start} and \cref{memlib-onealloc}.\\
The address of the first data byte of the object is returned, or \emph{null} if no space could be found or the requested object has a size of \verb|0|.\\
This routine runs in $O(n)$ for the number of free blocks, i.e. the amount of fragmentation in the memory.\\
The \verb|memaddref| routine simply increments the reference count of an object. It does not check whether the address is part of a free block or aligned to the start of an object.\\
This routine runs in $O(1)$.\\
The \verb|free| routine decrements the reference count of an object. If the counter goes to zero, the object will be deallocated. During deallocation, the algorithm iterates over the free blocks to find blocks immediately adjacent the object, if any. If a free block is present immediately in front of the object, the free block is resized to cover the object too, otherwise the object is turned into a free block. If a second free block is present immediately following this free block, the current block is resized to cover the second free block.\\
This routine runs in $O(n)$ for the number of free blocks, i.e. the amount of fragmentation in the memory.\\
\subsection{Calling convention}
\label{memlib-calling}
The calling convention for \emph{memlib} and other internal subroutines consists of first pushing the return address onto the stack. Any arguments must then pushed onto the stack in order, followed by a jump to the start of the subroutine. The subroutine must consume the arguments and the return address. If a subroutine has a return value, it must be pushed onto the stack.\\
For example, \emph{memalloc} requires one argument, the requested size, and returns the allocated address. Firstly, the return address is pushed, which is either loaded from a label or calculated from the current address. Secondly, the size is pushed to the stack. Thirdly, the jump to \verb|memalloc| is made and the subroutine is executed. After execution of malloc, the object's location is popped from the stack.
\begin{figure}
\includegraphics{memlib-objects}
\caption{Structure of objects and free blocks with \emph{memlib}. A free block either points to the next free block or to \emph{null}.}
\label{memlib-objects}
\end{figure}
\begin{figure}
\includegraphics[width=\textwidth]{memlib-start}
\caption{Structure of the memory after initialisation by \emph{memlib}.}
\label{memlib-start}
\end{figure}
\begin{figure}
\includegraphics[width=\textwidth]{memlib-onealloc}
\caption{Structure of the memory after allocating a single object of size 8 with \emph{memlib}.}
\label{memlib-onealloc}
\end{figure}
\begin{figure}
\includegraphics[width=\textwidth]{memlib-runtime}
\caption{Structure of the memory after allocating objects of sizes 8, 4 and 12 respectively and deallocating the object of size 4.}
\label{memlib-runtime}
\end{figure}

View File

@ -1,13 +1,15 @@
% Summary of the main features of your programming language (max. 1 page)
This report describes the programming language and implementation \emph{Boppi}. First a summary of the language is provided, listing the kind of features in the language. Next is a description of language features including examples and semantics. This is followed by a description of Java classes used to compile the language and a listing of changes to the ILOC virtual machine. Then the obstacles encountered during development and solutions to them are covered. Lastly, a description of test programs and concluding words about the project.
\section{Language overview}
As per the project description, the language description is written in \emph{ANTLR 4}, leveraging its support for direct left-recursion and far lookahead. The compiler chain is written in Java and compiles Boppi programs to the \emph{ILOC} virtual machine language. ILOC was chosen for its simple instruction set and ability to extend and inspect the virtual machine.\\
A library for dynamic allocation and for basic functions were written in ILOC.\\
Boppi includes the following features:
\begin{enumerate}
\item basic expressions
\item conditionals
\item variables
\item functions
\end{enumerate}
Conditionals comprise constructions equivalent to \verb|if|, \verb|if else| and \verb|while|.
Variables have a lifetime, namely the scope (between \verb|{ }| or within conditionals and functions) in which they are declared. Variables can be used as type identifiers for other variables.
Function references can be passed around in variables, but they may not work correctly when run in a different scope.
\begin{description}
\item[basic expressions] comprise arithmetic with integers, comparisons with integers, characters and booleans.
\item[conditionals] comprise constructions equivalent to \verb|if|, \verb|if else| and \verb|while|.
\item[variables (and constants)] names that have an explicit type and that can be assigned a value. Moreover these have a scope (between \verb|{ }| or within conditionals and functions) in which they can be used.
\item[functions] pieces of code that can be called from somewhere else. These pieces may take parameters and return a value. Functions in Boppi can be saved in variables. Moreover, variables declared within a function can outlive a call to the function, thereby creating a \emph{closure}.
\item[arrays] a sequence of values of a certain type. Arrays in Boppi are created at run-time and may have any non-void type.
\end{description}

View File

@ -1,3 +1,85 @@
% Extended test program. The listing of one (correct) extended test program, as well as the generated
% target code for that program and one or more example executions showing the correct functioning of
% the generated code.
As an example of a test program, we will look at a memoizing recursive fibonacci program. This program is well-suited because it contains I/O, variables, loops, functions, arrays, function references and closures. See \cref{test-example} for the source code and the text file \verb|doc/fibonacciRecursiveExample.iloc| for the generated ILOC. The compiled form is quite long, mostly because it heavily increments and decrements reference counts and does so in-line and without optimizations.\\
The program works by repeatedly asking for a number. If the user provides a positive number, it returns the value of the Fibonacci sequence at that position. If the user provides a zero or negative number, the program terminates. See \cref{test-example-runs} for a few runs of the program.
\begin{figure}
\caption{Source code of \emph{fibonacciRecursive.boppi}}
\label{test-example}
\begin{minted}{boppi}
function (int)->int memoizedFib() {
var int[] memo;
memo := array(int, 50);
function int fib(int n) {
if n < 1 || n > 46 then
0
else
if n < 2 then
1
else
if memo[n] > 0 then
memo[n]
else
memo[n] := fib(n-1)+fib(n-2)
fi
fi
fi
};
fib
};
var (int)->int myFib;
myFib := memoizedFib();
var int n;
while read(n) > 0 do
print(myFib(n))
od;
\end{minted}
\end{figure}
\begin{figure}
\begin{subfigure}{0.2\textwidth}
\begin{minted}{boppi}
> 1
<<< 1
> 0
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.2\textwidth}
\begin{minted}{boppi}
> -5
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.2\textwidth}
\begin{minted}{boppi}
> 3
<<< 2
> 4
<<< 3
> 5
<<< 5
> 6
<<< 8
> 0
\end{minted}
\end{subfigure}
\hfill
\begin{subfigure}{0.2\textwidth}
\begin{minted}{boppi}
> 46
<<< 1836311903
> 47
<<< 0
> -1337
\end{minted}
\end{subfigure}
\caption{Examples of input and output on \emph{fibonacciRecursive}.}
\label{test-example-runs}
\end{figure}

View File

@ -4,5 +4,98 @@
% syntactic, semantic or run-time errors.
% All tests should be provided as part of the zip-file. One test programshould be included as an appendix
% in the report (see below).
Testing the Boppi language is done with fully automated ANTLR4 tests. The test suite is designed to quickly add new syntactic, semantic and runtime tests and check them for errors. For this purpose, the \emph{pp.s1184725.boppi.test.BoppiTests} class contains various helper methods that capture errors and pass input and output.\\
Each feature of the Boppi language is tested for correctness with a set of automated tests. Each set of tests checks for both correct and incorrect syntax, semantics and runtime evaluation. Moreover, the dynamic allocator \cref{memlib} is tested for correctness in a separate test suite.\\
\inputminted{java}{\projectroot src/pp/s1184725/boppi/test/AllTests.java}
\section{Basic expressions}
The basic expressions are tested for:
\begin{itemize}
\item correctly parsing a valid program
\item reporting errors for programs with invalid lexemes and invalid expressions
\item correctly checking a valid program
\item reporting errors for programs with type errors
\item correctly executing a valid program and printing the results of expressions
\item reporting a warning for expressions that require more than the recommended number of registers
\item crashing the virtual machine when a division by zero is executed
\end{itemize}
\section{Variables}
Simple variables are tested for:
\begin{itemize}
\item correctly parsing variable names and linking them to a single object
\item rejecting invalid variable names
\item correctly matching a variable use to a declared variable
\item reporting an error when a variable is used without being declared before
\item correctly redeclaring variables in a deeper scope
\item reporting an error if a variable is out of scope
\item reporting an error if a block ends with a declaration
\item reporting an error if a constant variable is assigned more than once (statically checked)
\item reporting an error if a variable is used before it is assigned (statically checked)
\end{itemize}
\section{Conditional code}
Conditionals are tested for:
\begin{itemize}
\item correctly parsing if expressions, while statements and if statements with and without an alternative
\item correctly returning the inner type of an if expression
\item correctly returning void of an if or while statement
\item reporting an error if the condition in a conditional does not return a boolean type
\item correctly opening a scope for the variables in the condition and in the bodies of a conditional
\item correctly taking the right branch in an if statement or expression
\item correctly iterating a while loop as long as the condition is true
\end{itemize}
\section{Functions}
Functions are tested for:
\begin{itemize}
\item correctly parsing functions with and without a return type
\item correctly parsing functions with zero, one or multiple typed parameters
\item correctly parsing function calls
\item reporting an error when a block ends with a function declaration
\item correctly matching a name to a declared function
\item correctly matching the return type of a function to the type of the function body, if applicable
\item correctly matching the actual parameter types of a function call to the formal parameter types
\item correctly setting the return type of a function call to the function definition's return type
\item reporting an error if the number of actual parameters in a function call does not match the number of formal parameters
\item correctly opening a scope for parameters
\item correctly checking and running recursive functions
\item correctly declaring functions inside functions
\item correctly assigning and reassigning function variables
\end{itemize}
\section{Closures}
Having functions as first-class citizens (i.e. function references can be passed around) calls for the possibility of closures: a context for a function with all the non-local variables in case the function requires them when run.\\
Closures are tested for:
\begin{itemize}
\item correctly parsing function type declarations
\item correctly passing function variables as parameters and return values
\item correctly using non-local variables when a function is called outside its declared context
\item correctly using the same non-local variables across multiple function calls with the same context
\end{itemize}
The tests for garbage collection are skipped, due to the way activation records (\emph{AR}) are deallocated. See \cref{future-work} for more information.
\section{Arrays}
Arrays are tested for:
\begin{itemize}
\item correctly parsing array types and nested arrays
\item correctly parsing array accessors
\item correctly parsing variable properties (\emph{array.length})
\item correctly checking array literals
\item correctly checking array constructors and assigning them to arrays
\item correctly returning the element type of an array access
\item correctly performing a bounds check on an array access during run-time
\item correctly performing equality checks between arrays
\end{itemize}
\section{Allocator}
The \verb|memlib| library is tested for:
\begin{itemize}
\item correctly allocating objects
\item correctly deallocating an object when its reference count goes to zero
\item correctly incrementing and decrementing reference counts
\item correctly merging freed space
\item correctly allocating objects in fitting free slots
\item halting the machine when a program tries to free empty space
\item halting the machine when a null pointer has its reference count incremented or decremented
\end{itemize}

View File

@ -1,2 +1,14 @@
% All ANTLR tree walkers (listeners and visitors). The complete listing of each implementation of a
% tree listener or tree visitor for your grammar.
The Boppi compiler chain has two stages after parsing: annotation and generation. The first stage performs type checking, variable and function linking, variable offsets and variable assignments. This can be seen in \cref{boppi-checker}. The second stage performs register assignment and code generation, as can be seen in \cref{boppi-generator}.
\begin{code}
\caption{Type checker and annotater for the Boppi compiler.}
\label{boppi-checker}
\inputminted{java}{\projectroot src/pp/s1184725/boppi/BoppiChecker.java}
\end{code}
\begin{code}
\caption{Code generator for the Boppi compiler.}
\label{boppi-generator}
\inputminted{java}{\projectroot src/pp/s1184725/boppi/BoppiGenerator.java}
\end{code}

View File

@ -27,8 +27,6 @@
\tableofcontents
\newpage
\listoftodos[TODO]
\chapter{Introduction}
\input{report-summary}
@ -59,7 +57,7 @@
\chapter{ANTLR walkers}
\input{report-walker}
\chapter{Test programs}
\chapter{Test program}
\input{report-test-program}
\end{document}

20
doc/struct-memory.dot Normal file
View File

@ -0,0 +1,20 @@
digraph {
rankdir=RL
node[shape=record]
ar[label="...
|Local variables
|...
|<arp>Parameters
|<carp>Caller's ARP
|Return address
|Return value
|<al>Access link
"]
ar:carp->{null1[shape=point]}
ar:al->{null2[shape=point]}
{arp[shape=none, label="ARP"]}->ar:arp
}

View File

@ -0,0 +1,24 @@
digraph {
{
rank=same
sp1[label="sp"]
sp2[label="sp"]
call[label=<<table>
<tr><td colspan="2"><i>stack<br>...</i></td></tr>
<tr><td>object size</td><td>4</td></tr>
<tr><td port="stack">return address</td><td>351</td></tr>
<tr><td colspan="2"><i>...</i></td></tr>
</table>>, shape=plaintext, xlabel="memalloc"]
return[label=<<table>
<tr><td colspan="2"><i>stack<br>...</i></td></tr>
<tr><td port="stack">object address</td><td>56</td></tr>
<tr><td colspan="2"><i>...</i></td></tr>
</table>>, shape=plaintext, xlabel="result"]
}
sp1->call:stack
sp2->return:stack
}

129
doc/successor.iloc.txt Normal file
View File

@ -0,0 +1,129 @@
170 loadI 0 => r_nul // initialise zero register
171 loadI 176 => r_arp // malloc
172 push r_arp // malloc
173 loadI 24 => r_arp // malloc
174 push r_arp // malloc
175 jumpI -> memalloc // malloc
176 pop => r_arp // malloc
177 addI r_arp,16 => r_arp // construct main AR
178 jumpI -> s0 // define successor - jump over body
179 nop // define successor - entry point
180 addI r_arp,0 => __1 // add offset
181 load __1 => __1 // load address
182 loadI 1 => __2 // 1
183 add __1,__2 => __1 // +
184 storeAI __1 => r_arp,-12 // define successor - move result
185 loadAI r_arp,-8 => __2 // load ref count
186 loadI 1 => __1 // one
187 cmp_LE __2,__1 => __2 // check more than one ref
188 cbr __2 -> ycl1,ncl2 // remove vars if last reference
189 ycl1: nop // cleanup target
190 ncl2: nop // no cleanup target
191 loadAI r_arp,-8 => __1 // define successor - load return address
192 jump -> __1 // define successor - go to return address
193 s0: nop // define successor - skip target
194 loadI 199 => __1 // malloc
195 push __1 // malloc
196 loadI 12 => __1 // malloc
197 push __1 // malloc
198 jumpI -> memalloc // malloc
199 pop => __1 // malloc
200 loadI 179 => __2 // define successor - load target address
201 storeAI __2 => __1,0 // define successor - set target address
202 storeAI r_arp => __1,4 // define successor - copy ARP
203 loadI 20 => __2 // define successor - load AR size
204 storeAI __2 => __1,8 // define successor - set AR size
205 storeAI __1 => r_arp,0 // define successor - set function reference
206 i2i r_arp => ART // AR incRef
207 cmp_NE ART,r_nul => __1 // AR incRef
208 cbr __1 -> aril3,arid4 // AR incRef
209 aril3: loadI 214 => __1 // AR incRef
210 push __1 // AR incRef
211 subI ART,16 => __1 // AR incRef
212 push __1 // AR incRef
213 jumpI -> memaddref // AR incRef
214 loadAI ART,-16 => ART // AR incRef
215 cmp_NE ART,r_nul => __1 // AR incRef
216 cbr __1 -> aril3,arid4 // AR incRef
217 arid4: nop // AR incRef
218 addI r_arp,4 => __1 // add offset
219 in "" => __1 //
220 addI r_arp,4 => __2 // add offset
221 store __1 => __2 // save to var x
222 addI r_arp,0 => __3 // add offset
223 load __3 => __2 // call successor - load function reference
224 loadAI __2,8 => __2 // call successor - load AR size
225 loadI 229 => __1 // malloc
226 push __1 // malloc
227 push __2 // malloc
228 jumpI -> memalloc // malloc
229 pop => __1 // malloc
230 addI __1,16 => __1 // call successor - shift AR
231 addI r_arp,4 => __2 // add offset
232 load __2 => __2 // load address
233 storeAI __2 => __1,0 // call successor - store param 0
234 addI r_arp,0 => __4 // add offset
235 load __4 => __2 // call successor - load function reference
236 storeAI r_arp => __1,-4 // call successor - link caller ARP
237 loadAI __2,4 => __3 // call successor - load AL
238 storeAI __3 => __1,-16 // call successor - link AL
239 loadAI __1,-16 => ART // add ref for callee's AL
240 i2i ART => ART // AR incRef
241 cmp_NE ART,r_nul => __3 // AR incRef
242 cbr __3 -> aril5,arid6 // AR incRef
243 aril5: loadI 248 => __3 // AR incRef
244 push __3 // AR incRef
245 subI ART,16 => __3 // AR incRef
246 push __3 // AR incRef
247 jumpI -> memaddref // AR incRef
248 loadAI ART,-16 => ART // AR incRef
249 cmp_NE ART,r_nul => __3 // AR incRef
250 cbr __3 -> aril5,arid6 // AR incRef
251 arid6: nop // AR incRef
252 loadI 257 => __3 // call successor - load return address
253 storeAI __3 => __1,-8 // call successor - set return address
254 i2i __1 => r_arp // call successor - move ARP
255 loadAI __2,0 => __3 // call successor - load target address
256 jump -> __3 // call successor - execute
257 i2i r_arp => ART // AR decRef
258 cmp_NE ART,r_nul => __2 // AR decRef
259 cbr __2 -> ardl7,ardd8 // AR decRef
260 ardl7: loadI 265 => __2 // AR decRef
261 push __2 // AR decRef
262 subI ART,16 => __2 // AR decRef
263 push __2 // AR decRef
264 jumpI -> memfree // AR decRef
265 loadAI ART,-16 => ART // AR decRef
266 cmp_NE ART,r_nul => __2 // AR decRef
267 cbr __2 -> ardl7,ardd8 // AR decRef
268 ardd8: nop // AR decRef
269 loadAI r_arp,-12 => __1 // call successor - load result
270 loadAI r_arp,-4 => r_arp // call successor - reset ARP
271 out "",__1 //
272 loadAI r_arp,0 => __3 // remove reference get var
273 cmp_EQ __3,r_nul => __2 // remove reference
274 cbr __2 -> ynul9,nnul10 // remove reference
275 nnul10: nop // remove reference
276 loadI 280 => __2 // free
277 push __2 // free
278 push __3 // free
279 jumpI -> memfree // free
280 loadAI __3,4 => __3 // remove reference
281 i2i __3 => ART // AR decRef
282 cmp_NE ART,r_nul => __2 // AR decRef
283 cbr __2 -> ardl11,ardd12 // AR decRef
284 ardl11: loadI 289 => __2 // AR decRef
285 push __2 // AR decRef
286 subI ART,16 => __2 // AR decRef
287 push __2 // AR decRef
288 jumpI -> memfree // AR decRef
289 loadAI ART,-16 => ART // AR decRef
290 cmp_NE ART,r_nul => __2 // AR decRef
291 cbr __2 -> ardl11,ardd12 // AR decRef
292 ardd12: nop // AR decRef
293 ynul9: nop // remove reference
294 subI r_arp,16 => r_arp // deconstruct main AR
295 loadI 299 => __3 // free
296 push __3 // free
297 push r_arp // free
298 jumpI -> memfree // free