From 2c2fa4963cb4faae0ee81035c90519c95d715427 Mon Sep 17 00:00:00 2001
From: David Brown <dave@brownsmeet.com>
Date: Sun, 10 Jul 2016 14:21:49 +0100
Subject: [PATCH] Link to build status that can access logs, add date, time and
 branch to status.

---
 doc/BasicTypeSize.md           | 172 +++++++++++++++++++++++++++++++++
 src/tools/make/buildall.pl     |  57 ++++++-----
 src/tools/make/checksumtest.sh |  23 -----
 3 files changed, 205 insertions(+), 47 deletions(-)
 create mode 100644 doc/BasicTypeSize.md
 delete mode 100644 src/tools/make/checksumtest.sh

diff --git a/doc/BasicTypeSize.md b/doc/BasicTypeSize.md
new file mode 100644
index 00000000..ab3364fb
--- /dev/null
+++ b/doc/BasicTypeSize.md
@@ -0,0 +1,172 @@
+## Cross Platform Compatibility and Basic Type Sizes in Vishap Oberon
+
+###### Abstract
+
+Vishap Oberon needs to support 32 and 64 bit machine architectures. 16 and
+possibly 8 bits would be good too.
+
+Currently Vishap Oberon has different INTEGER, LONGINT and SET sizes on 16
+and 32 bit architectures. While this enables memory management code to use
+LONGINT on all architectures, it breaks library and user code which makes
+assumptions about type sizes.
+
+The goal is to specify changes to the Vishap compiler and library to allow C
+code generation for multiple machine architectures without breaking existing
+code, and to allow serialized data to be interchangeable between machine
+architectures.
+
+###### Motivation
+
+Current type sizes are loosely specified and vary between implementations. There
+are conflicting general assumptions, for example: that LONGINT is large enough
+to contain any machine address; but also that LONGINT always take 32 bits when
+serialised to files. (See Oakwood guidelines appendix A 1.2.5.4.)
+
+The compiler has ended up with a number of INTEGER types, each with its own
+set of code to handle declaration, access, storage etc. There is a good
+opportunity to refactor and simplify the current duplicated code.
+
+Neither C's basic types, nor Oberon's are fixed in size. Yet for cross platform
+compatability we need fixed size types.
+
+###### Basis of implementation - integers and sets
+
+In the generated C code we use these types for all integer and set variables:
+
+| Unsigned    | Signed      | Sets   |
+| ----------- | ----------- | ------ |
+| INTEGER_U8  | INTEGER_S8  | SET_8  |
+| INTEGER_U16 | INTEGER_S16 | SET_16 |
+| INTEGER_U32 | INTEGER_S32 | SET_32 |
+| INTEGER_U64 | INTEGER_S64 | SET_64 |
+
+SYSTEM.H uses conditional compilation to derive these types from the types
+available in the C compiler we are building on.
+
+Then, with suitable compiler options we control the mapping of compiler types
+to these C types.
+
+There are three strategies that clients may wish to use:
+
+1) Emphasizing compatability with serialised data and existing code. Here
+   we fix Oberon type sizes across platforms, and introduce a new LONG64 type
+   as follows:
+
+   | Oberon type | Size             |
+   | ----------- | ---------------- |
+   | BOOLEAN     | 8  bits          |
+   | BYTE        | 8  bits unsigned |
+   | SHORTINT    | 8  bits signed   |
+   | INTEGER     | 16 bits signed   |
+   | LONGINT     | 32 bits signed   |
+   | SET         | 32 flag bits     |  
+   | LONG64      | 64 bits signed   |
+
+   This gives a set of sizes that are available on all platforms (even SDCC
+   supports 64 bit ints), and which have fixed characteristics (e.g. the size of
+   character array sufficient to support any LONGINT values is fixed.)
+
+   Note that these sizes match current Vishap Oberon behaviour on x86.
+
+2) Emphasizing performant maxima. Here we make e.g. LONGINT the largest
+   efficient size available. On x86 we stick with the sizes as above, but for
+   x64 we make changes to INTEGER, LONGINT and SET as follows:
+
+   | Oberon type | Size on x64      |
+   | ----------- | ---------------- |
+   | INTEGER     | 32 bits signed   |
+   | LONGINT     | 64 bits signed   |
+   | SET         | 64 flag bits     |  
+
+3) Supporting system code, especially memory management.
+
+   With SYSTEM imported, we extend the parsing of type INTEGER to accept a
+   subsequent qualifier which may be U8, S8, U16, S16, U32, S32, U64, S64 or
+   ADDRESS.
+
+   Thus the type `INTEGER ADDRESS` takes over the role of `LONGINT` in existing
+   memory management code. The compiler will map `INTEGER ADDRESS` to the
+   relevant `INTEGER_U32` or `INTEGER_U64` in generated C code.
+
+   Additionally the fixed size qualifiers U8, S8, U16, etc. allow the writing of
+   Oberon source code that generates the same C code regardless of compilation
+   options used.
+
+###### Cross platform libraries
+
+Many integral input parameters are currently coded as LONGINT with the intention
+of accepting any size of integer. E.g. Texts.WriteInt. All such code needs
+upgrading to accept LONG64 with implementation changes where necessary to
+account for the larger values. Boring, but straightforward.
+
+Some integral output parameter are currently coded as `VAR LONGINT`, for example
+the integer value field `i` in RECORD type `Scanner`. This is a problem:
+
+Assuming scenario 1 - LONGINT is always 32 bits.
+
+  - If retained as LONGINT, Scanner won't be able to handle 64 bit integers.
+  - If changed to LONG64, existing code will compile with type compatibility
+    errors.
+
+So neither option is possible on its own.
+
+The simplest workaround is to add a new field `l` and a new scanner class
+Long64 (similar to the pair of REAL and LONGREAL values already in Scanner).
+
+Existing code will continue to work with values in the 32 bit range (which is
+OK, because that's all the existing code can generate). New code can allow for
+thye LongReal case.
+
+(Ugly but workable).
+
+Oakwood says that INTEGER must be stored as 2 bytes little endian, so Files.Mod
+must use 16 bits on file for Files.ReadInt and Files.WriteInt. So what happens
+in scenario 2 above? Since INTEGER is 32 bits in scenario 2, it would be
+necessary to call Files.WriteLInt Files.ReadLInt. This is not obvious, and will
+need the coder to work around the apparent type incompatibility.  
+
+If only the type compatibility of passing a smaller integer variable to a larger
+value parameter also worked for a larger var parameter.
+
+Would this be possible?
+
+e.g.
+
+```Modula-2
+  PROCEDURE p(VAR x: LONGINT); BEGIN ... END p;
+
+  PROCEDURE q;
+  VAR r: INTEGER;
+  BEGIN p(r) END q;
+```
+
+q passes an `INTEGER` to the `VAR x: LONGINT` parameter of p. Normally this
+would be a type compatability error.
+
+If we want to defer value range checking until runtime, the compiler would have
+to behave as if q was written with a temp LONGINT variable like this:
+
+```Modula-2
+  PROCEDURE q;
+  VAR r: INTEGER; temp: LONGINT;
+  BEGIN p(temp); r := SHORT(temp) END q;
+```
+
+Not simple enough.
+
+
+###### IMPORT SYSTEM
+
+With SYSTEM imported, we allow the type INTEGER to be followed by a size and
+sign specification consiting of a letter (U for unsigned or S for signed)
+followed by a numeric bit count which may be 8, 16, 32 or 64. Additionally
+INTEGER may be followed by the word ADDRESS to request an unsigned integer type
+of the same size as a machine address.
+
+Thus we could define
+
+###### Not supported
+
+This solution does not seek to handle architectures such as the 8086/80286 where
+a generalised address is not a single numeric value. TopSpeed Modula handled
+this nicely, but we don't go that far.
diff --git a/src/tools/make/buildall.pl b/src/tools/make/buildall.pl
index ee4bbeac..14e6cd60 100755
--- a/src/tools/make/buildall.pl
+++ b/src/tools/make/buildall.pl
@@ -75,7 +75,7 @@ sub parselog {
   my $tests        = "";
   open(my $log, $fn) // die "Couldn't open build log $fn.";
   while (<$log>) {
-    if (/^([0-9\/]+) [0-9.]+ [^ ]+\.log$/) {$date = $1;}
+    if (/^([0-9\/]+) [0-9.]+ [^ ]+\.log$/) {$date = $1; $time = $2}
     if (/^[^ ]+ --- Cleaning branch ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ---$/) {
       ($branch, $os, $compiler, $datamodel) = ($1, $2, $3, $4, $5);
     }
@@ -121,8 +121,8 @@ sub svgtext {
 
 my $rows = keys %status;
 
-my $width  = 680;
-my $height = ($rows+2) * $lineheight;
+my $width  = 1080;
+my $height = ($rows+2.2) * $lineheight;
 
 open(my $svg, ">build-status.svg") // die "Could not create build-status.svg.";
 print $svg '<svg width="680" height="', $height, '"';
@@ -132,34 +132,43 @@ print $svg '<rect x="3" y="3" width="', $width-6, '" height="', $height-6, '"';
 print $svg ' rx="20" ry="20" fill="#404040"';
 print $svg ' stroke="#20c020" stroke-width="4"/>', "\n";
 
-my $col1 = 20;
-my $col2 = 110;
-my $col3 = 200;
-my $col4 = 310;
-my $col5 = 400;
-my $col6 = 475;
-my $col7 = 580;
+my $col1  = 20;
+my $col2  = 120;
+my $col3  = 220;
+my $col4  = 320;
+my $col5  = 420;
+my $col6  = 520;
+my $col7  = 620;
+my $col8  = 720;
+my $col9  = 820;
+my $col10 = 920;
 
-svgtext($svg, $col1, 0, "#e0e0e0", "OS");
-svgtext($svg, $col2, 0, "#e0e0e0", "Compiler");
-svgtext($svg, $col3, 0, "#e0e0e0", "Data model");
-svgtext($svg, $col4, 0, "#e0e0e0", "Compiler");
-svgtext($svg, $col5, 0, "#e0e0e0", "Library");
-svgtext($svg, $col6, 0, "#e0e0e0", "C Source");
-svgtext($svg, $col7, 0, "#e0e0e0", "Tests");
+svgtext($svg, $col1,  0, "#e0e0e0", "Date");
+svgtext($svg, $col2,  0, "#e0e0e0", "Time");
+svgtext($svg, $col3,  0, "#e0e0e0", "Branch");
+svgtext($svg, $col4,  0, "#e0e0e0", "OS");
+svgtext($svg, $col5,  0, "#e0e0e0", "Compiler");
+svgtext($svg, $col6,  0, "#e0e0e0", "Data model");
+svgtext($svg, $col7,  0, "#e0e0e0", "Compiler");
+svgtext($svg, $col8,  0, "#e0e0e0", "Library");
+svgtext($svg, $col9,  0, "#e0e0e0", "C Source");
+svgtext($svg, $col10, 0, "#e0e0e0", "Tests");
 
 my $i=1;
 for my $key (sort keys %status) {
   my ($fn, $date, $time, $os, $compiler, $datamodel, $branch,
       $compilerok, $libraryok, $sourcechange, $tests) = @{$status{$key}};
   print $svg '<a xlink:href="', $fn, '">';
-  svgtext($svg, $col1, $i, "#c0c0c0", $os);
-  svgtext($svg, $col2, $i, "#c0c0c0", $compiler);
-  svgtext($svg, $col3, $i, "#c0c0c0", $datamodel);
-  svgtext($svg, $col4, $i, "#60ff60", $compilerok);
-  svgtext($svg, $col5, $i, "#60ff60", $libraryok);
-  svgtext($svg, $col6, $i, "#60ff60", $sourcechange);
-  svgtext($svg, $col7, $i, "#60ff60", $tests);
+  svgtext($svg, $col1,  $i, "#c0c0c0", $date);
+  svgtext($svg, $col2,  $i, "#c0c0c0", $time);
+  svgtext($svg, $col3,  $i, "#c0c0c0", $branch);
+  svgtext($svg, $col4,  $i, "#c0c0c0", $os);
+  svgtext($svg, $col5,  $i, "#c0c0c0", $compiler);
+  svgtext($svg, $col6,  $i, "#c0c0c0", $datamodel);
+  svgtext($svg, $col7,  $i, "#60ff60", $compilerok);
+  svgtext($svg, $col8,  $i, "#60ff60", $libraryok);
+  svgtext($svg, $col9,  $i, "#60ff60", $sourcechange);
+  svgtext($svg, $col10, $i, "#60ff60", $tests);
   print $svg '</a>';
   $i++;
 }
diff --git a/src/tools/make/checksumtest.sh b/src/tools/make/checksumtest.sh
deleted file mode 100644
index 47926771..00000000
--- a/src/tools/make/checksumtest.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-# Checksum tests
-#
-# Checksums object binaries (*.o) and compares with previous
-# checksum for this branch.
-#
-# Note: OPC.o and OPM.o are omitted as they both have text string constants
-# that include the build date.
-
-
-md5=md5
-if which md5sum >/dev/null 2>&1; then md5=md5sum; fi
-if [ -f $1 ]
-then
-  $md5 *.o | egrep -v "OP[CM]\\.o" >newsums
-  if diff -b $1 newsums
-  then printf "\n--- Object file checksums match ---\n"
-  else printf "\n--- Object file checksum mismatch ---\n"
-  fi
-  rm newsums
-else
-  $md5 *.o | egrep -v "OP[CM]\\.o" >$1
-  printf "\n--- Object files checksummed ---\n"
-fi