Update
This commit is contained in:
parent
b931aa36c3
commit
b6b5090c4c
5 changed files with 183 additions and 26 deletions
155
bytecode.md
155
bytecode.md
|
@ -4,7 +4,7 @@ TODO
|
|||
|
||||
## Example
|
||||
|
||||
Let's consider a simple algorithm that tests the [Collatz conjecture](collatz_conjecture.md) (which says that applying a simple operation from any starting number over and over will always lead to number 1). The algorithm in [C](c.md) would look as follows:
|
||||
Let's consider a simple algorithm that tests the [Collatz conjecture](collatz_conjecture.md) (which says that applying a simple operation from any starting number over and over will always lead to number 1). The program reads a number (one digit for simplicity) and then prints the sequence until reaching the final number 1. The algorithm in [C](c.md) would look as follows:
|
||||
|
||||
```
|
||||
// Collatz conjecture
|
||||
|
@ -35,7 +35,88 @@ int main(void)
|
|||
}
|
||||
```
|
||||
|
||||
The program reads a number (one digit for simplicity) and then prints the sequence until reaching the final number 1. Now let's rewrite the same algorithm in [comun](comun.md), a language which will allow us to produce bytecode:
|
||||
C will be normally compiled to [machine code](machine_code.md), however we can take a look at some immediate representation bytecode that compilers internally use to generate the machine code. The following is [LLVM](llvm.md), a widely used bytecode that can be produced from the above C code with [clang](clang.md) compiler (e.g. as `clang -cc1 tmp.c -S -emit-llvm -o -`):
|
||||
|
||||
```
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-pc-linux-gnu"
|
||||
|
||||
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
|
||||
|
||||
; Function Attrs: noinline nounwind optnone
|
||||
define i32 @next(i32 %n) #0 {
|
||||
entry:
|
||||
%n.addr = alloca i32, align 4
|
||||
store i32 %n, i32* %n.addr, align 4
|
||||
%0 = load i32, i32* %n.addr, align 4
|
||||
%rem = srem i32 %0, 2
|
||||
%tobool = icmp ne i32 %rem, 0
|
||||
br i1 %tobool, label %cond.true, label %cond.false
|
||||
|
||||
cond.true: ; preds = %entry
|
||||
%1 = load i32, i32* %n.addr, align 4
|
||||
%mul = mul nsw i32 3, %1
|
||||
%add = add nsw i32 %mul, 1
|
||||
br label %cond.end
|
||||
|
||||
cond.false: ; preds = %entry
|
||||
%2 = load i32, i32* %n.addr, align 4
|
||||
%div = sdiv i32 %2, 2
|
||||
br label %cond.end
|
||||
|
||||
cond.end: ; preds = %cond.false, %cond.true
|
||||
%cond = phi i32 [ %add, %cond.true ], [ %div, %cond.false ]
|
||||
ret i32 %cond
|
||||
}
|
||||
|
||||
; Function Attrs: noinline nounwind optnone
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%retval = alloca i32, align 4
|
||||
%n = alloca i32, align 4
|
||||
store i32 0, i32* %retval, align 4
|
||||
%call = call i32 (...) @getchar()
|
||||
%sub = sub nsw i32 %call, 48
|
||||
store i32 %sub, i32* %n, align 4
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %entry, %if.end
|
||||
%0 = load i32, i32* %n, align 4
|
||||
%call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %0)
|
||||
%1 = load i32, i32* %n, align 4
|
||||
%cmp = icmp eq i32 %1, 1
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %while.body
|
||||
br label %while.end
|
||||
|
||||
if.end: ; preds = %while.body
|
||||
%2 = load i32, i32* %n, align 4
|
||||
%call2 = call i32 @next(i32 %2)
|
||||
store i32 %call2, i32* %n, align 4
|
||||
br label %while.body
|
||||
|
||||
while.end: ; preds = %if.then
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
declare i32 @getchar(...) #1
|
||||
|
||||
declare i32 @printf(i8*, ...) #1
|
||||
|
||||
attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!llvm.ident = !{!1}
|
||||
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{!"clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final)"}
|
||||
```
|
||||
|
||||
TODO: analyze the above
|
||||
|
||||
Now let's rewrite the same algorithm in [comun](comun.md), a different language which will allow us to produce another kind of bytecode (obtained with `comun -T program.cmn`):
|
||||
|
||||
```
|
||||
# Collatz conjecture
|
||||
|
@ -65,7 +146,7 @@ next:
|
|||
.
|
||||
```
|
||||
|
||||
Here is annotated bytecode this compiles to:
|
||||
Here is annotated comun bytecode this compiles to:
|
||||
|
||||
```
|
||||
000000: DES 00 0111 # func \ next:
|
||||
|
@ -127,4 +208,70 @@ Here is annotated bytecode this compiles to:
|
|||
000038: END 00 0000
|
||||
```
|
||||
|
||||
TODO: analyze the above, show other bytecodes (python, java, ...)
|
||||
TODO: analyze the above, show other bytecodes (python, java, ...)
|
||||
|
||||
Let's try the same in [Python](python.md). The code we'll examine will look like this:
|
||||
|
||||
```
|
||||
# Collatz conjecture
|
||||
|
||||
def next(n):
|
||||
return 3 * n + 1 if n % 2 != 0 else n / 2
|
||||
|
||||
n = ord(raw_input()[0]) - ord('0')
|
||||
|
||||
while True:
|
||||
print(n)
|
||||
|
||||
if n == 1:
|
||||
break
|
||||
|
||||
n = next(n)
|
||||
```
|
||||
|
||||
And the bytecode we get (e.g. with `python -m dis program.py`):
|
||||
|
||||
```
|
||||
3 0 LOAD_CONST 0 (<code object next at ...)
|
||||
3 MAKE_FUNCTION 0
|
||||
6 STORE_NAME 0 (next)
|
||||
|
||||
6 9 LOAD_NAME 1 (ord)
|
||||
12 LOAD_NAME 2 (raw_input)
|
||||
15 CALL_FUNCTION 0
|
||||
18 LOAD_CONST 1 (0)
|
||||
21 BINARY_SUBSCR
|
||||
22 CALL_FUNCTION 1
|
||||
25 LOAD_NAME 1 (ord)
|
||||
28 LOAD_CONST 2 ('0')
|
||||
31 CALL_FUNCTION 1
|
||||
34 BINARY_SUBTRACT
|
||||
35 STORE_NAME 3 (n)
|
||||
|
||||
8 38 SETUP_LOOP 43 (to 84)
|
||||
>> 41 LOAD_NAME 4 (True)
|
||||
44 POP_JUMP_IF_FALSE 83
|
||||
|
||||
9 47 LOAD_NAME 3 (n)
|
||||
50 PRINT_ITEM
|
||||
51 PRINT_NEWLINE
|
||||
|
||||
11 52 LOAD_NAME 3 (n)
|
||||
55 LOAD_CONST 3 (1)
|
||||
58 COMPARE_OP 2 (==)
|
||||
61 POP_JUMP_IF_FALSE 68
|
||||
|
||||
12 64 BREAK_LOOP
|
||||
65 JUMP_FORWARD 0 (to 68)
|
||||
|
||||
14 >> 68 LOAD_NAME 0 (next)
|
||||
71 LOAD_NAME 3 (n)
|
||||
74 CALL_FUNCTION 1
|
||||
77 STORE_NAME 3 (n)
|
||||
80 JUMP_ABSOLUTE 41
|
||||
>> 83 POP_BLOCK
|
||||
>> 84 LOAD_CONST 4 (None)
|
||||
87 RETURN_VALUE
|
||||
```
|
||||
|
||||
TODO: make sense of it and analyze it
|
Loading…
Add table
Add a link
Reference in a new issue