277 lines
7.6 KiB
Markdown
277 lines
7.6 KiB
Markdown
# Bytecode
|
|
|
|
TODO
|
|
|
|
## Example
|
|
|
|
Let's consider a simple algorithm that tests the [Collatz conjecture](collatz_conjecture.md) (which says that applying a simple operation from any starting number over and over will always lead to number 1). The program reads a number (one digit for simplicity) and then prints the sequence until reaching the final number 1. The algorithm in [C](c.md) would look as follows:
|
|
|
|
```
|
|
// Collatz conjecture
|
|
#include <stdio.h>
|
|
|
|
int next(int n)
|
|
{
|
|
return n % 2 ? // is odd?
|
|
3 * n + 1 :
|
|
n / 2;
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
int n = getchar() - '0'; // read input ASCII digit
|
|
|
|
while (1)
|
|
{
|
|
printf("%d\n",n);
|
|
|
|
if (n == 1)
|
|
break;
|
|
|
|
n = next(n);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
```
|
|
|
|
C will be normally compiled to [machine code](machine_code.md), however we can take a look at some immediate representation bytecode that compilers internally use to generate the machine code. The following is [LLVM](llvm.md), a widely used bytecode that can be produced from the above C code with [clang](clang.md) compiler (e.g. as `clang -cc1 tmp.c -S -emit-llvm -o -`):
|
|
|
|
```
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-pc-linux-gnu"
|
|
|
|
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
|
|
|
|
; Function Attrs: noinline nounwind optnone
|
|
define i32 @next(i32 %n) #0 {
|
|
entry:
|
|
%n.addr = alloca i32, align 4
|
|
store i32 %n, i32* %n.addr, align 4
|
|
%0 = load i32, i32* %n.addr, align 4
|
|
%rem = srem i32 %0, 2
|
|
%tobool = icmp ne i32 %rem, 0
|
|
br i1 %tobool, label %cond.true, label %cond.false
|
|
|
|
cond.true: ; preds = %entry
|
|
%1 = load i32, i32* %n.addr, align 4
|
|
%mul = mul nsw i32 3, %1
|
|
%add = add nsw i32 %mul, 1
|
|
br label %cond.end
|
|
|
|
cond.false: ; preds = %entry
|
|
%2 = load i32, i32* %n.addr, align 4
|
|
%div = sdiv i32 %2, 2
|
|
br label %cond.end
|
|
|
|
cond.end: ; preds = %cond.false, %cond.true
|
|
%cond = phi i32 [ %add, %cond.true ], [ %div, %cond.false ]
|
|
ret i32 %cond
|
|
}
|
|
|
|
; Function Attrs: noinline nounwind optnone
|
|
define i32 @main() #0 {
|
|
entry:
|
|
%retval = alloca i32, align 4
|
|
%n = alloca i32, align 4
|
|
store i32 0, i32* %retval, align 4
|
|
%call = call i32 (...) @getchar()
|
|
%sub = sub nsw i32 %call, 48
|
|
store i32 %sub, i32* %n, align 4
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %entry, %if.end
|
|
%0 = load i32, i32* %n, align 4
|
|
%call1 = call i32 (i8*, ...) @printf(i8* ... )
|
|
%1 = load i32, i32* %n, align 4
|
|
%cmp = icmp eq i32 %1, 1
|
|
br i1 %cmp, label %if.then, label %if.end
|
|
|
|
if.then: ; preds = %while.body
|
|
br label %while.end
|
|
|
|
if.end: ; preds = %while.body
|
|
%2 = load i32, i32* %n, align 4
|
|
%call2 = call i32 @next(i32 %2)
|
|
store i32 %call2, i32* %n, align 4
|
|
br label %while.body
|
|
|
|
while.end: ; preds = %if.then
|
|
ret i32 0
|
|
}
|
|
|
|
declare i32 @getchar(...) #1
|
|
|
|
declare i32 @printf(i8*, ...) #1
|
|
|
|
attributes #0 = { ... }
|
|
attributes #1 = { ... }
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!llvm.ident = !{!1}
|
|
|
|
!0 = !{i32 1, !"wchar_size", i32 4}
|
|
!1 = !{!"clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final)"}
|
|
```
|
|
|
|
TODO: analyze the above
|
|
|
|
Now let's rewrite the same algorithm in [comun](comun.md), a different language which will allow us to produce another kind of bytecode (obtained with `comun -T program.cmn`):
|
|
|
|
```
|
|
# Collatz conjecture
|
|
|
|
next:
|
|
$0 2 % ? # is odd?
|
|
3 * 1 +
|
|
;
|
|
2 /
|
|
.
|
|
.
|
|
|
|
<- # read input ASCII digit
|
|
"0" - # convert it to number
|
|
|
|
@@
|
|
# print:
|
|
$0 10 / "0" + ->
|
|
$0 10 % "0" + ->
|
|
10 ->
|
|
|
|
$0 1 = ?
|
|
!@
|
|
.
|
|
|
|
next
|
|
.
|
|
```
|
|
|
|
Here is annotated comun bytecode this compiles to:
|
|
|
|
```
|
|
000000: DES 00 0111 # func \ next:
|
|
000001: JMA 00 0100... # 20 (#14) |
|
|
000002: COC 00 0001 |
|
|
000003: MGE 00 0000 | $0
|
|
000004: CON' 00 0010 # 2 (#2) | 2
|
|
000005: MOX 00 0000 | %
|
|
000006: DES 00 0001 # if | \ ?
|
|
000007: JNA 00 0000... # 16 (#10) | |
|
|
000008: COC 00 0001 | |
|
|
000009: CON' 00 0011 # 3 (#3) | | 3
|
|
00000a: MUX 00 0000 | | *
|
|
00000b: CON' 00 0001 # 1 (#1) | | 1
|
|
00000c: ADX 00 0000 | | +
|
|
00000d: DES 00 0010 # else | < ;
|
|
00000e: JMA 00 0011... # 19 (#13) | |
|
|
00000f: COC 00 0001 | |
|
|
000010: CON' 00 0010 # 2 (#2) | | 2
|
|
000011: DIX 00 0000 | | /
|
|
000012: DES 00 0011 # end if | / .
|
|
000013: RET 00 0000 / .
|
|
000014: INI 00 0000
|
|
000015: INP 00 0000 <-
|
|
000016: CON' 00 0000... # 48 (#30) "0"
|
|
000017: COC 00 0011
|
|
000018: SUX 00 0000 -
|
|
000019: DES 00 0100 # loop \ @@
|
|
00001a: MGE 00 0000 | $0
|
|
00001b: CON' 00 1010 # 10 (#a) | 10
|
|
00001c: DIX 00 0000 | /
|
|
00001d: CON' 00 0000... # 48 (#30) | "0"
|
|
00001e: COC 00 0011 |
|
|
00001f: ADX 00 0000 | +
|
|
000020: OUT 00 0000 | ->
|
|
000021: MGE 00 0000 | $0
|
|
000022: CON' 00 1010 # 10 (#a) | 10
|
|
000023: MOX 00 0000 | %
|
|
000024: CON' 00 0000... # 48 (#30) | "0"
|
|
000025: COC 00 0011 |
|
|
000026: ADX 00 0000 | +
|
|
000027: OUT 00 0000 | ->
|
|
000028: CON' 00 1010 # 10 (#a) | 10
|
|
000029: OUT 00 0000 | ->
|
|
00002a: MGE 00 0000 | $0
|
|
00002b: CON' 00 0001 # 1 (#1) | 1
|
|
00002c: EQX 00 0000 | =
|
|
00002d: DES 00 0001 # if | \ ?
|
|
00002e: JNA 00 0100... # 52 (#34) | |
|
|
00002f: COC 00 0011 | |
|
|
000030: DES 00 0101 # break | | !@
|
|
000031: JMA 00 1000... # 56 (#38) | |
|
|
000032: COC 00 0011 | |
|
|
000033: DES 00 0011 # end if | / .
|
|
000034: CAL 00 0011 # 3 (#3) | next
|
|
000035: DES 00 0110 # end loop / .
|
|
000036: JMA 00 1010... # 26 (#1a)
|
|
000037: COC 00 0001
|
|
000038: END 00 0000
|
|
```
|
|
|
|
TODO: analyze the above, show other bytecodes (python, java, ...)
|
|
|
|
Let's try the same in [Python](python.md). The code we'll examine will look like this:
|
|
|
|
```
|
|
# Collatz conjecture
|
|
|
|
def next(n):
|
|
return 3 * n + 1 if n % 2 != 0 else n / 2
|
|
|
|
n = ord(raw_input()[0]) - ord('0')
|
|
|
|
while True:
|
|
print(n)
|
|
|
|
if n == 1:
|
|
break
|
|
|
|
n = next(n)
|
|
```
|
|
|
|
And the bytecode we get (e.g. with `python -m dis program.py`):
|
|
|
|
```
|
|
3 0 LOAD_CONST 0 (<code object next at ...)
|
|
3 MAKE_FUNCTION 0
|
|
6 STORE_NAME 0 (next)
|
|
|
|
6 9 LOAD_NAME 1 (ord)
|
|
12 LOAD_NAME 2 (raw_input)
|
|
15 CALL_FUNCTION 0
|
|
18 LOAD_CONST 1 (0)
|
|
21 BINARY_SUBSCR
|
|
22 CALL_FUNCTION 1
|
|
25 LOAD_NAME 1 (ord)
|
|
28 LOAD_CONST 2 ('0')
|
|
31 CALL_FUNCTION 1
|
|
34 BINARY_SUBTRACT
|
|
35 STORE_NAME 3 (n)
|
|
|
|
8 38 SETUP_LOOP 43 (to 84)
|
|
>> 41 LOAD_NAME 4 (True)
|
|
44 POP_JUMP_IF_FALSE 83
|
|
|
|
9 47 LOAD_NAME 3 (n)
|
|
50 PRINT_ITEM
|
|
51 PRINT_NEWLINE
|
|
|
|
11 52 LOAD_NAME 3 (n)
|
|
55 LOAD_CONST 3 (1)
|
|
58 COMPARE_OP 2 (==)
|
|
61 POP_JUMP_IF_FALSE 68
|
|
|
|
12 64 BREAK_LOOP
|
|
65 JUMP_FORWARD 0 (to 68)
|
|
|
|
14 >> 68 LOAD_NAME 0 (next)
|
|
71 LOAD_NAME 3 (n)
|
|
74 CALL_FUNCTION 1
|
|
77 STORE_NAME 3 (n)
|
|
80 JUMP_ABSOLUTE 41
|
|
>> 83 POP_BLOCK
|
|
>> 84 LOAD_CONST 4 (None)
|
|
87 RETURN_VALUE
|
|
```
|
|
|
|
TODO: make sense of it and analyze it |