Update

2023-12-22 00:43:13 +01:00 · 2023-12-22 00:43:13 +01:00 · b6b5090c4c
commit b6b5090c4c
parent b931aa36c3
5 changed files with 183 additions and 26 deletions
--- a/bytecode.md
+++ b/bytecode.md
@ -4,7 +4,7 @@ TODO

 ## Example

-Let's consider a simple algorithm that tests the [Collatz conjecture](collatz_conjecture.md) (which says that applying a simple operation from any starting number over and over will always lead to number 1). The algorithm in [C](c.md) would look as follows:
+Let's consider a simple algorithm that tests the [Collatz conjecture](collatz_conjecture.md) (which says that applying a simple operation from any starting number over and over will always lead to number 1). The program reads a number (one digit for simplicity) and then prints the sequence until reaching the final number 1. The algorithm in [C](c.md) would look as follows:

 ```
 // Collatz conjecture
@ -35,7 +35,88 @@ int main(void)
 }
 ```

-The program reads a number (one digit for simplicity) and then prints the sequence until reaching the final number 1. Now let's rewrite the same algorithm in [comun](comun.md), a language which will allow us to produce bytecode:
+C will be normally compiled to [machine code](machine_code.md), however we can take a look at some immediate representation bytecode that compilers internally use to generate the machine code. The following is [LLVM](llvm.md), a widely used bytecode that can be produced from the above C code with [clang](clang.md) compiler (e.g. as `clang -cc1 tmp.c -S -emit-llvm -o -`):
+
+```
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind optnone
+define i32 @next(i32 %n) #0 {
+entry:
+  %n.addr = alloca i32, align 4
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32, i32* %n.addr, align 4
+  %rem = srem i32 %0, 2
+  %tobool = icmp ne i32 %rem, 0
+  br i1 %tobool, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32, i32* %n.addr, align 4
+  %mul = mul nsw i32 3, %1
+  %add = add nsw i32 %mul, 1
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32, i32* %n.addr, align 4
+  %div = sdiv i32 %2, 2
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %add, %cond.true ], [ %div, %cond.false ]
+  ret i32 %cond
+}
+
+; Function Attrs: noinline nounwind optnone
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %n = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  %call = call i32 (...) @getchar()
+  %sub = sub nsw i32 %call, 48
+  store i32 %sub, i32* %n, align 4
+  br label %while.body
+
+while.body:                                       ; preds = %entry, %if.end
+  %0 = load i32, i32* %n, align 4
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %0)
+  %1 = load i32, i32* %n, align 4
+  %cmp = icmp eq i32 %1, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %while.body
+  br label %while.end
+
+if.end:                                           ; preds = %while.body
+  %2 = load i32, i32* %n, align 4
+  %call2 = call i32 @next(i32 %2)
+  store i32 %call2, i32* %n, align 4
+  br label %while.body
+
+while.end:                                        ; preds = %if.then
+  ret i32 0
+}
+
+declare i32 @getchar(...) #1
+
+declare i32 @printf(i8*, ...) #1
+
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final)"}
+```
+
+TODO: analyze the above
+
+Now let's rewrite the same algorithm in [comun](comun.md), a different language which will allow us to produce another kind of bytecode (obtained with `comun -T program.cmn`):

 ```
 # Collatz conjecture
@ -65,7 +146,7 @@ next:
 .
 ```

-Here is annotated bytecode this compiles to:
+Here is annotated comun bytecode this compiles to:

 ```
 000000: DES  00 0111    # func      \ next:
@ -127,4 +208,70 @@ Here is annotated bytecode this compiles to:
 000038: END  00 0000
 ```

-TODO: analyze the above, show other bytecodes (python, java, ...)
+TODO: analyze the above, show other bytecodes (python, java, ...)
+
+Let's try the same in [Python](python.md). The code we'll examine will look like this:
+
+```
+# Collatz conjecture
+
+def next(n):
+  return 3 * n + 1 if n % 2 != 0 else n / 2
+
+n = ord(raw_input()[0]) - ord('0')
+
+while True:
+  print(n)
+
+  if n == 1:
+    break
+
+  n = next(n)
+```
+
+And the bytecode we get (e.g. with `python -m dis program.py`):
+
+```
+ 3       0 LOAD_CONST           0 (<code object next at ...)
+         3 MAKE_FUNCTION        0
+         6 STORE_NAME           0 (next)
+
+ 6       9 LOAD_NAME            1 (ord)
+        12 LOAD_NAME            2 (raw_input)
+        15 CALL_FUNCTION        0
+        18 LOAD_CONST           1 (0)
+        21 BINARY_SUBSCR       
+        22 CALL_FUNCTION        1
+        25 LOAD_NAME            1 (ord)
+        28 LOAD_CONST           2 ('0')
+        31 CALL_FUNCTION        1
+        34 BINARY_SUBTRACT     
+        35 STORE_NAME           3 (n)
+
+ 8      38 SETUP_LOOP          43 (to 84)
+    >>  41 LOAD_NAME            4 (True)
+        44 POP_JUMP_IF_FALSE   83
+
+ 9      47 LOAD_NAME            3 (n)
+        50 PRINT_ITEM          
+        51 PRINT_NEWLINE       
+
+11      52 LOAD_NAME            3 (n)
+        55 LOAD_CONST           3 (1)
+        58 COMPARE_OP           2 (==)
+        61 POP_JUMP_IF_FALSE   68
+
+12      64 BREAK_LOOP          
+        65 JUMP_FORWARD         0 (to 68)
+
+14  >>  68 LOAD_NAME            0 (next)
+        71 LOAD_NAME            3 (n)
+        74 CALL_FUNCTION        1
+        77 STORE_NAME           3 (n)
+        80 JUMP_ABSOLUTE       41
+    >>  83 POP_BLOCK       
+    >>  84 LOAD_CONST           4 (None)
+        87 RETURN_VALUE
+```
+
+TODO: make sense of it and analyze it