;;; x86_64.ss ;;; Copyright 1984-2017 Cisco Systems, Inc. ;;; ;;; Licensed under the Apache License, Version 2.0 (the "License"); ;;; you may not use this file except in compliance with the License. ;;; You may obtain a copy of the License at ;;; ;;; http://www.apache.org/licenses/LICENSE-2.0 ;;; ;;; Unless required by applicable law or agreed to in writing, software ;;; distributed under the License is distributed on an "AS IS" BASIS, ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ;;; See the License for the specific language governing permissions and ;;; limitations under the License. ;;; SECTION 1: registers (if-feature windows (define-registers (reserved [%tc %r14 #t 14] [%sfp %r13 #t 13] [%ap %rdi #t 7] #;[%esp] #;[%eap] #;[%trap]) (allocable [%ac0 %rbp #t 5] [%xp %r12 #t 12] [%ts %rax %Cretval #f 0] [%td %rbx #t 3] [%ac1 %r10 %deact #f 10] [%yp %r11 #f 11] [%cp %r15 #t 15] [#;%ret %rsi #t 6] [ %rcx %Carg1 #f 1] [ %rdx %Carg2 #f 2] [ %r8 %Carg3 #f 8] [ %r9 %Carg4 #f 9]) (machine-dependent [%Cfparg1 %Cfpretval #f 0] [%Cfparg2 #f 1] [%Cfparg3 #f 2] [%Cfparg4 #f 3] [%flreg1 #f 4] ; xmm 0-5 are caller-save [%flreg2 #f 5] ; xmm 6-15 are callee-save [%sp #t 4])) (define-registers (reserved [%tc %r14 #t 14] [%sfp %r13 #t 13] [%ap %r9 %Carg6 #f 9] #;[%esp] #;[%eap] #;[%trap]) (allocable [%ac0 %rbp #t 5] [%xp %r12 #t 12] [%ts %rax %Cretval #f 0] [%td %rbx #t 3] [%ac1 %r10 %deact #f 10] [%yp %r11 #f 11] [%cp %r15 #t 15] [#;%ret %r8 %Carg5 #f 8] [ %rdi %Carg1 #f 7] [ %rsi %Carg2 #f 6] [ %rdx %Carg3 #f 2] [ %rcx %Carg4 #f 1]) (machine-dependent [%Cfparg1 %Cfpretval #f 0] [%Cfparg2 #f 1] [%Cfparg3 #f 2] [%Cfparg4 #f 3] [%Cfparg5 #f 4] [%Cfparg6 #f 5] [%Cfparg7 #f 6] [%Cfparg8 #f 7] [%flreg1 #f 8] [%flreg2 #f 9] [%sp #t 4]))) ;;; SECTION 2: instructions (module (md-handle-jump) ; also sets primitive handlers (import asm-module) (define-syntax seq (lambda (x) (syntax-case x () [(_ e ... ex) (with-syntax ([(t ...) (generate-temporaries #'(e ...))]) #'(let ([t e] ...) (with-values ex (case-lambda [(x*) (cons* t ... x*)] [(x* p) (values (cons* t ... x*) p)]))))]))) ; don't bother with literal@? check since lvalues can't be literals (define lmem? mref?) (define mem? (lambda (x) (or (lmem? x) (literal@? x)))) (define real-imm32? (lambda (x) (nanopass-case (L15c Triv) x [(immediate ,imm) (constant-case ptr-bits [(32) #t] ; allows 2^31...2^32-1 per immediate? [(64) (signed-32? imm)])] ; 2^31...2^32-1 aren't 32-bit values on 64-bit machines [else #f]))) (define negatable-real-imm32? (lambda (x) (nanopass-case (L15c Triv) x [(immediate ,imm) (<= #x-7FFFFFFF imm #x7FFFFFFF)] [else #f]))) (define lvalue->ur (lambda (x k) (if (mref? x) (let ([u (make-tmp 'u)]) (seq (set-ur=mref u x) (k u))) (k x)))) (define mref->mref (lambda (a k) (define return (lambda (x0 x1 imm) (k (with-output-language (L15d Triv) `(mref ,x0 ,x1 ,imm))))) (nanopass-case (L15c Triv) a [(mref ,lvalue0 ,lvalue1 ,imm) (lvalue->ur lvalue0 (lambda (x0) (lvalue->ur lvalue1 (lambda (x1) (if (signed-32? imm) (return x0 x1 imm) (let ([u (make-tmp 'u)]) (seq (build-set! ,u (immediate ,imm)) (if (eq? x1 %zero) (return x0 u 0) (seq (build-set! ,u (asm ,null-info ,asm-add ,u ,x1)) (return x0 u 0))))))))))]))) (define mem->mem (lambda (a k) (cond [(literal@? a) (let ([u (make-tmp 'u)]) (seq (build-set! ,u ,(literal@->literal a)) (k (with-output-language (L15d Lvalue) `(mref ,u ,%zero 0)))))] [else (mref->mref a k)]))) (define-syntax coercible? (syntax-rules () [(_ ?a ?aty*) (let ([a ?a] [aty* ?aty*]) (or (memq 'ur aty*) (or (and (memq 'imm32 aty*) (imm32? a)) (and (memq 'imm aty*) (imm? a)) (and (memq 'zero aty*) (imm0? a)) (and (memq 'real-imm32 aty*) (real-imm32? a)) (and (memq 'negatable-real-imm32 aty*) (negatable-real-imm32? a)) (and (memq 'mem aty*) (mem? a)))))])) (define-syntax coerce-opnd ; passes k something compatible with aty* (syntax-rules () [(_ ?a ?aty* ?k) (let ([a ?a] [aty* ?aty*] [k ?k]) (cond [(and (memq 'mem aty*) (mem? a)) (mem->mem a k)] [(and (memq 'imm32 aty*) (imm32? a)) (k (imm->imm a))] [(and (memq 'imm aty*) (imm? a)) (k (imm->imm a))] [(and (memq 'zero aty*) (imm0? a)) (k (imm->imm a))] [(and (memq 'real-imm32 aty*) (real-imm32? a)) (k (imm->imm a))] [(and (memq 'negatable-real-imm32 aty*) (negatable-real-imm32? a)) (k (imm->imm a))] [(memq 'ur aty*) (cond [(ur? a) (k a)] [(imm? a) (let ([u (make-tmp 'u)]) (seq (build-set! ,u ,(imm->imm a)) (k u)))] [(mem? a) (mem->mem a (lambda (a) (let ([u (make-tmp 'u)]) (seq (build-set! ,u ,a) (k u)))))] [else (sorry! 'coerce-opnd "unexpected operand ~s" a)])] [else (sorry! 'coerce-opnd "cannot coerce ~s to ~s" a aty*)]))])) (define set-ur=mref (lambda (ur mref) (mref->mref mref (lambda (mref) (build-set! ,ur ,mref))))) (define-who extract-imm (lambda (e) (nanopass-case (L15d Triv) e [(immediate ,imm) imm] [else (sorry! who "~s is not an immediate" e)]))) (define md-handle-jump (lambda (t) (with-output-language (L15d Tail) (nanopass-case (L15c Triv) t [,lvalue (if (mem? lvalue) (mem->mem lvalue (lambda (mref) (values '() `(jump ,mref)))) (values '() `(jump ,lvalue)))] [(literal ,info) (guard (and (not (info-literal-indirect? info)) (memq (info-literal-type info) '(entry library-code)))) (values '() `(jump (literal ,info)))] [(label-ref ,l ,offset) (values '() `(jump (label-ref ,l ,offset)))] [else (let ([tmp (make-tmp 'utmp)]) (values (with-output-language (L15d Effect) `(set! ,(make-live-info) ,tmp ,t)) `(jump ,tmp)))])))) (define-syntax define-instruction (lambda (x) (define acsame-mem (lambda (c a b bty* k) #`(lambda (c a b) (if (and (lmem? c) (same? a c) (coercible? b '#,bty*)) (coerce-opnd b '#,bty* (lambda (b) (mem->mem c (lambda (c) (#,k c b))))) (next c a b))))) (define-who acsame-ur (lambda (c a b bty* k) #`(lambda (c a b) (if (and (same? a c) (coercible? b '#,bty*)) (coerce-opnd b '#,bty* (lambda (b) (cond [(ur? c) (#,k c b)] [(mref? c) (nanopass-case (L15c Triv) c [(mref ,lvalue0 ,lvalue1 ,imm) ; TODO: does this use too many registers? (no longer special casing fv x0, x1 case) (lvalue->ur lvalue0 (lambda (x0) (lvalue->ur lvalue1 (lambda (x1) (let ([u1 (make-tmp 'u)]) (if (signed-32? imm) (seq (build-set! ,u1 (mref ,x0 ,x1 ,imm)) (#,k u1 b) (build-set! (mref ,x0 ,x1 ,imm) ,u1)) (let ([u2 (make-tmp 'u)]) (seq (build-set! ,u2 ,imm) (build-set! ,x1 (asm ,null-info ,asm-add ,x1 ,u2)) (build-set! ,u1 (mref ,x0 ,x1 0)) (#,k u1 b) (build-set! (mref ,x0 ,x1 0) ,u1)))))))))])] ; can't be literal@ since literals can't be lvalues [else (sorry! '#,(datum->syntax #'* who) "unexpected operand ~s" c)]))) (next c a b))))) (define make-value-clause (lambda (fmt) (syntax-case fmt (mem ur xp) [(op (c mem) (a ?c) (b bty* ...)) (bound-identifier=? #'?c #'c) (acsame-mem #'c #'a #'b #'(bty* ...) #'(lambda (c b) (rhs c c b)))] [(op (c ur) (a ?c) (b bty* ...)) (bound-identifier=? #'?c #'c) (acsame-ur #'c #'a #'b #'(bty* ...) #'(lambda (c b) (rhs c c b)))] [(op (c mem) (a aty* ...) (b ?c)) (bound-identifier=? #'?c #'c) (acsame-mem #'c #'b #'a #'(aty* ...) #'(lambda (c a) (rhs c a c)))] [(op (c ur) (a aty* ...) (b ?c)) (bound-identifier=? #'?c #'c) (acsame-ur #'c #'b #'a #'(aty* ...) #'(lambda (c a) (rhs c a c)))] [(op (c mem) (a aty ...) (b bty ...)) #`(lambda (c a b) (if (and (lmem? c) (coercible? a '(aty ...)) (coercible? b '(bty ...))) (coerce-opnd b '(bty ...) (lambda (b) (coerce-opnd a '(aty ...) (lambda (a) (mref->mref c (lambda (c) (rhs c a b))))))) (next c a b)))] [(op (c ur) (a aty ...) (b bty ...)) #`(lambda (c a b) (if (and (coercible? a '(aty ...)) (coercible? b '(bty ...))) (coerce-opnd b '(bty ...) (lambda (b) (coerce-opnd a '(aty ...) (lambda (a) (if (ur? c) (rhs c a b) (let ([u (make-tmp 'u)]) (seq (rhs u a b) (mref->mref c (lambda (c) (build-set! ,c ,u)))))))))) (next c a b)))] ; four-operand case below can require four unspillables [(op (c ur) (a ur) (b ur) (d dty ...)) (not (memq 'mem (datum (dty ...)))) #`(lambda (c a b d) (if (coercible? d '(dty ...)) (coerce-opnd d '(dty ...) (lambda (d) (coerce-opnd a '(ur) (lambda (a) (coerce-opnd b '(ur) (lambda (b) (if (ur? c) (rhs c a b d) (let ([u (make-tmp 'u)]) (seq (rhs u a b d) (mref->mref c (lambda (c) (build-set! ,c ,u)))))))))))) (next c a b d)))] [(op (c mem) (a ?c)) (bound-identifier=? #'?c #'c) #`(lambda (c a) (if (and (lmem? c) (same? c a)) (mem->mem c (lambda (c) (rhs c c))) (next c a)))] [(op (c ur) (a ?c)) (bound-identifier=? #'?c #'c) #`(lambda (c a) (if (same? a c) (if (ur? c) (rhs c c) (mem->mem c (lambda (c) (let ([u (make-tmp 'u)]) (seq (build-set! ,u ,c) (rhs u u) (build-set! ,c ,u)))))) (next c a)))] [(op (c mem) (a aty ...)) #`(lambda (c a) (if (and (lmem? c) (coercible? a '(aty ...))) (coerce-opnd a '(aty ...) (lambda (a) (mem->mem c (lambda (c) (rhs c a))))) (next c a)))] [(op (c ur) (a aty ...)) #`(lambda (c a) (if (coercible? a '(aty ...)) (coerce-opnd a '(aty ...) (lambda (a) (if (ur? c) (rhs c a) (mem->mem c (lambda (c) (let ([u (make-tmp 'u)]) (seq (rhs u a) (build-set! ,c ,u)))))))) (next c a)))] [(op (c ur)) #`(lambda (c) (if (ur? c) (rhs c) (mem->mem c (lambda (c) (let ([u (make-tmp 'u)]) (seq (rhs u) (build-set! ,c ,u)))))))] [(op (c mem)) #`(lambda (c) (if (lmem? c) (mem->mem c (lambda (c) (rhs c))) (next c)))]))) (define-who make-pred-clause (lambda (fmt) (syntax-case fmt () [(op (a aty ...) ...) #`(lambda (a ...) (if (and (coercible? a '(aty ...)) ...) #,(let f ([a* #'(a ...)] [aty** #'((aty ...) ...)]) (if (null? a*) #'(rhs a ...) #`(coerce-opnd #,(car a*) '#,(car aty**) (lambda (#,(car a*)) #,(f (cdr a*) (cdr aty**)))))) (next a ...)))]))) (define-who make-effect-clause (lambda (fmt) (syntax-case fmt () [(op (a aty ...) ...) #`(lambda (a ...) (if (and (coercible? a '(aty ...)) ...) #,(let f ([a* #'(a ...)] [aty** #'((aty ...) ...)]) (if (null? a*) #'(rhs a ...) #`(coerce-opnd #,(car a*) '#,(car aty**) (lambda (#,(car a*)) #,(f (cdr a*) (cdr aty**)))))) (next a ...)))]))) (syntax-case x (definitions) [(k context (sym ...) (definitions defn ...) [(op (a aty ...) ...) ?rhs0 ?rhs1 ...] ...) ; potentially unnecessary level of checking, but the big thing is to make sure ; the number of operands expected is the same on every clause of define-instruction (and (not (null? #'(op ...))) (andmap identifier? #'(sym ...)) (andmap identifier? #'(op ...)) (andmap identifier? #'(a ... ...)) (andmap identifier? #'(aty ... ... ...))) (with-implicit (k info return with-output-language) (with-syntax ([((opnd* ...) . ignore) #'((a ...) ...)]) (define make-proc (lambda (make-clause) (let f ([op* #'(op ...)] [fmt* #'((op (a aty ...) ...) ...)] [arg* #'((a ...) ...)] [rhs* #'((?rhs0 ?rhs1 ...) ...)]) (if (null? op*) #'(lambda (opnd* ...) (sorry! name "no match found for ~s" (list opnd* ...))) #`(let ([next #,(f (cdr op*) (cdr fmt*) (cdr arg*) (cdr rhs*))] [rhs (lambda #,(car arg*) (let ([#,(car op*) name]) #,@(car rhs*)))]) #,(make-clause (car fmt*))))))) (unless (let ([a** #'((a ...) ...)]) (let* ([a* (car a**)] [len (length a*)]) (andmap (lambda (a*) (fx= (length a*) len)) (cdr a**)))) (syntax-error x "mismatched instruction arities")) (cond [(free-identifier=? #'context #'value) #`(let ([fvalue (lambda (name) (lambda (info opnd* ...) defn ... (with-output-language (L15d Effect) (#,(make-proc make-value-clause) opnd* ...))))]) (begin (safe-assert (eq? (primitive-type (%primitive sym)) 'value)) (primitive-handler-set! (%primitive sym) (fvalue 'sym))) ...)] [(free-identifier=? #'context #'pred) #`(let ([fpred (lambda (name) (lambda (info opnd* ...) defn ... (with-output-language (L15d Pred) (#,(make-proc make-pred-clause) opnd* ...))))]) (begin (safe-assert (eq? (primitive-type (%primitive sym)) 'pred)) (primitive-handler-set! (%primitive sym) (fpred 'sym))) ...)] [(free-identifier=? #'context #'effect) #`(let ([feffect (lambda (name) (lambda (info opnd* ...) defn ... (with-output-language (L15d Effect) (#,(make-proc make-effect-clause) opnd* ...))))]) (begin (safe-assert (eq? (primitive-type (%primitive sym)) 'effect)) (primitive-handler-set! (%primitive sym) (feffect 'sym))) ...)] [else (syntax-error #'context "unrecognized context")])))] [(k context (sym ...) cl ...) #'(k context (sym ...) (definitions) cl ...)] [(k context sym cl ...) (identifier? #'sym) #'(k context (sym) (definitions) cl ...)]))) ; x is not the same as z in any clause that follows a clause where (x z) ; and y is coercible to one of its types, however: ; WARNING: do not assume that if x isn't the same as z then x is independent ; of z, since x might be an mref with z as it's base or index (define-instruction value (-) [(op (z mem) (x z) (y ur imm32)) `(set! ,(make-live-info) ,z (asm ,info ,asm-sub ,x ,y))] [(op (z mem) (x zero) (y z)) `(set! ,(make-live-info) ,z (asm ,info ,asm-negate ,y))] [(op (z ur) (x z) (y ur mem imm32)) `(set! ,(make-live-info) ,z (asm ,info ,asm-sub ,x ,y))] [(op (z ur) (x zero) (y ur)) (seq `(set! ,(make-live-info) ,z ,y) `(set! ,(make-live-info) ,z (asm ,info ,asm-negate ,z)))] [(op (z ur) (x ur mem imm32) (y z)) `(set! ,(make-live-info) ,z (asm ,info ,asm-sub-negate ,y ,x))] [(op (z ur) (x ur) (y negatable-real-imm32)) (seq `(move-related ,z ,x) `(set! ,(make-live-info) ,z (asm ,info ,(asm-lea1 (- (extract-imm y))) ,x)))] [(op (z ur) (x mem imm32) (y ur)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,y) `(set! ,(make-live-info) ,t (asm ,info ,asm-sub-negate ,t ,x)) `(set! ,(make-live-info) ,z ,t)))] [(op (z ur) (x ur) (y ur mem imm32)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,x) `(set! ,(make-live-info) ,t (asm ,info ,asm-sub ,t ,y)) `(set! ,(make-live-info) ,z ,t)))]) (define-instruction value (-/ovfl -/eq) ; must set condition codes, so can't use lea or sub-negate [(op (z mem) (x z) (y ur imm32)) `(set! ,(make-live-info) ,z (asm ,info ,asm-sub ,x ,y))] [(op (z mem) (x zero) (y z)) `(set! ,(make-live-info) ,z (asm ,info ,asm-negate ,y))] [(op (z ur) (x z) (y ur mem imm32)) `(set! ,(make-live-info) ,z (asm ,info ,asm-sub ,x ,y))] [(op (z ur) (x zero) (y ur)) (seq `(set! ,(make-live-info) ,z ,y) `(set! ,(make-live-info) ,z (asm ,info ,asm-negate ,z)))] [(op (z ur) (x ur) (y ur mem imm32)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,x) `(set! ,(make-live-info) ,t (asm ,info ,asm-sub ,t ,y)) `(set! ,(make-live-info) ,z ,t)))]) (define-instruction value (+) [(op (z mem) (x z) (y ur imm32)) `(set! ,(make-live-info) ,z (asm ,info ,asm-add ,z ,y))] [(op (z mem) (x ur imm32) (y z)) `(set! ,(make-live-info) ,z (asm ,info ,asm-add ,z ,x))] [(op (z ur) (x z) (y ur mem imm32)) `(set! ,(make-live-info) ,z (asm ,info ,asm-add ,z ,y))] [(op (z ur) (x ur mem imm32) (y z)) `(set! ,(make-live-info) ,z (asm ,info ,asm-add ,z ,x))] [(op (z ur) (x ur) (y real-imm32)) (seq `(move-related ,z ,x) `(set! ,(make-live-info) ,z (asm ,info ,(asm-lea1 (extract-imm y)) ,x)))] [(op (z ur) (x real-imm32) (y ur)) (seq `(move-related ,z ,y) `(set! ,(make-live-info) ,z (asm ,info ,(asm-lea1 (extract-imm x)) ,y)))] [(op (z ur) (x ur) (y mem imm32)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,x) `(set! ,(make-live-info) ,t (asm ,info ,asm-add ,t ,y)) `(set! ,(make-live-info) ,z ,t)))] [(op (z ur) (x mem imm32) (y ur)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,y) `(set! ,(make-live-info) ,t (asm ,info ,asm-add ,t ,x)) `(set! ,(make-live-info) ,z ,t)))] [(op (z ur) (x ur) (y ur)) (seq `(move-related ,z ,y) `(move-related ,z ,x) `(set! ,(make-live-info) ,z (asm ,info ,(asm-lea2 0) ,x ,y)))]) (define-instruction value (+/ovfl +/carry) ; must set condition codes, so can't use lea [(op (z mem) (x z) (y ur imm32)) `(set! ,(make-live-info) ,z (asm ,info ,asm-add ,z ,y))] [(op (z mem) (x ur imm32) (y z)) `(set! ,(make-live-info) ,z (asm ,info ,asm-add ,z ,x))] [(op (z ur) (x z) (y ur mem imm32)) `(set! ,(make-live-info) ,z (asm ,info ,asm-add ,z ,y))] [(op (z ur) (x ur mem imm32) (y z)) `(set! ,(make-live-info) ,z (asm ,info ,asm-add ,z ,x))] [(op (z ur) (x ur) (y mem imm32)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,x) `(set! ,(make-live-info) ,t (asm ,info ,asm-add ,t ,y)) `(set! ,(make-live-info) ,z ,t)))] [(op (z ur) (x mem imm32) (y ur)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,y) `(set! ,(make-live-info) ,t (asm ,info ,asm-add ,t ,x)) `(set! ,(make-live-info) ,z ,t)))] [(op (z ur) (x ur) (y ur)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,x) `(set! ,(make-live-info) ,t (asm ,info ,asm-add ,t ,y)) `(set! ,(make-live-info) ,z ,t)))]) (define-instruction value (* */ovfl) ; */ovfl must set multiply-overflow flag on overflow [(op (z ur) (x z) (y ur mem)) `(set! ,(make-live-info) ,z (asm ,info ,asm-mul ,z ,y))] [(op (z ur) (x ur mem) (y z)) `(set! ,(make-live-info) ,z (asm ,info ,asm-mul ,z ,x))] [(op (z ur) (x ur mem) (y imm32)) `(set! ,(make-live-info) ,z (asm ,info ,asm-muli ,x ,y))] [(op (z ur) (x imm32) (y ur mem)) `(set! ,(make-live-info) ,z (asm ,info ,asm-muli ,y ,x))] [(op (z ur) (x ur) (y ur)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,x) `(set! ,(make-live-info) ,t (asm ,info ,asm-mul ,t ,y)) `(set! ,(make-live-info) ,z ,t)))]) (define-instruction value (/) (definitions (define go (lambda (z x y) (let ([urax (make-precolored-unspillable 'urax %rax)] [urdx (make-precolored-unspillable 'urdx %rdx)]) (with-output-language (L15d Effect) (seq `(set! ,(make-live-info) ,urax ,x) `(set! ,(make-live-info) ,urdx (asm ,null-info ,asm-sext-rax->rdx ,urax)) `(set! ,(make-live-info) ,urax (asm ,null-info ,asm-div ,urax ,urdx ,y)) `(set! ,(make-live-info) ,z ,urax))))))) [(op (z mem) (x ur mem imm) (y ur mem)) (go z x y)] [(op (z ur) (x ur mem imm) (y ur mem)) (go z x y)]) (define-instruction value (logand logor logxor) [(op (z mem) (x z) (y ur imm32)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-addop op) ,z ,y))] [(op (z mem) (x ur imm32) (y z)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-addop op) ,z ,x))] [(op (z ur) (x z) (y ur mem imm32)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-addop op) ,z ,y))] [(op (z ur) (x ur mem imm32) (y z)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-addop op) ,z ,x))] [(op (z ur) (x ur) (y mem imm32)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,x) `(set! ,(make-live-info) ,t (asm ,info ,(asm-addop op) ,t ,y)) `(set! ,(make-live-info) ,z ,t)))] [(op (z ur) (x ur mem imm32) (y ur)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,y) `(set! ,(make-live-info) ,t (asm ,info ,(asm-addop op) ,t ,x)) `(set! ,(make-live-info) ,z ,t)))]) (define-instruction value (lognot) [(op (z mem) (x z)) `(set! ,(make-live-info) ,z (asm ,info ,asm-lognot ,x))] [(op (z ur) (x z)) `(set! ,(make-live-info) ,z (asm ,info ,asm-lognot ,x))] [(op (z ur) (x ur mem imm32)) (seq `(set! ,(make-live-info) ,z ,x) `(set! ,(make-live-info) ,z (asm ,info ,asm-lognot ,z)))]) ; TODO: use lea for certain constant shifts when x != z (define-instruction value (sll srl sra) (definitions (define go (lambda (info op z x y) (let ([urcx (make-precolored-unspillable 'urcx %rcx)]) (with-output-language (L15d Effect) (seq `(set! ,(make-live-info) ,urcx ,y) `(set! ,(make-live-info) ,z (asm ,info ,(asm-shiftop op) ,x ,urcx)))))))) [(op (z mem) (x z) (y imm32)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-shiftop op) ,x ,y))] ;; NB: need to return in these cases? [(op (z mem) (x z) (y ur mem imm)) (go info op z x y)] [(op (z ur) (x z) (y imm32)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-shiftop op) ,x ,y))] [(op (z ur) (x z) (y ur mem imm)) (go info op z x y)] [(op (z ur) (x ur mem imm32) (y imm32)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,x) `(set! ,(make-live-info) ,t (asm ,info ,(asm-shiftop op) ,t ,y)) `(set! ,(make-live-info) ,z ,t)))] [(op (z ur) (x ur mem imm32) (y ur mem imm)) (let ([t (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,t ,x) (go info op t t y) `(set! ,(make-live-info) ,z ,t)))]) (define-instruction value move [(op (z mem) (x ur imm32)) `(set! ,(make-live-info) ,z ,x)] [(op (z ur) (x ur mem imm)) ; NOTE: risc arch's will need to deal with limitations on imm `(set! ,(make-live-info) ,z ,x)]) (define-instruction value lea1 [(op (z ur) (x ur)) (let ([offset (info-lea-offset info)]) (if (signed-32? offset) `(set! ,(make-live-info) ,z (asm ,info ,(asm-lea1 (info-lea-offset info)) ,x)) (let ([u (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,u (immediate ,offset)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-lea2 0) ,x ,u))))))]) (define-instruction value lea2 [(op (z ur) (x ur) (y ur)) (let ([offset (info-lea-offset info)]) (if (signed-32? offset) `(set! ,(make-live-info) ,z (asm ,info ,(asm-lea2 (info-lea-offset info)) ,x ,y)) (let ([u (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,u (immediate ,offset)) `(set! ,(make-live-info) ,u (asm ,null-info ,asm-add ,u ,y)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-lea2 0) ,x ,u))))))]) (define-instruction value (sext8 sext16 sext32 zext8 zext16 zext32) [(op (z ur) (x ur mem)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-move/extend op) ,x))]) (define-instruction value (load) (definitions (define maybe-swap (lambda (info z expr) (with-output-language (L15d Effect) (if (info-load-swapped? info) (seq expr `(set! ,(make-live-info) ,z (asm ,info ,(asm-swap (info-load-type info)) ,z))) expr))))) [(op (z ur) (x ur) (y ur) (w imm32)) (maybe-swap info z `(set! ,(make-live-info) ,z (asm ,info ,(asm-load (info-load-type info)) ,x ,y ,w)))] [(op (z ur) (x ur) (y ur) (w ur)) (maybe-swap info z (if (eq? y %zero) `(set! ,(make-live-info) ,z (asm ,info ,(asm-load (info-load-type info)) ,x ,w (immediate 0))) (let ([u (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-lea2 0) ,y ,w)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-load (info-load-type info)) ,x ,u (immediate 0)))))))]) (define-instruction effect (store) (definitions (define maybe-swap (lambda (info w k) (with-output-language (L15d Effect) (if (info-load-swapped? info) (let ([u (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,u ,w) `(set! ,(make-live-info) ,u (asm ,info ,(asm-swap (info-load-type info)) ,u)) (k u))) (k w)))))) [(op (x ur) (y ur) (z imm32) (w ur real-imm32)) (maybe-swap info w (lambda (w) `(asm ,info ,(asm-store (info-load-type info)) ,x ,y ,z ,w)))] [(op (x ur) (y ur) (z ur) (w ur real-imm32)) (maybe-swap info w (lambda (w) (if (eq? y %zero) `(asm ,info ,(asm-store (info-load-type info)) ,x ,z (immediate 0) ,w) (let ([u (make-tmp 'u)]) (seq `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-lea2 0) ,y ,z)) `(asm ,info ,(asm-store (info-load-type info)) ,x ,u (immediate 0) ,w))))))]) (define-instruction effect (load-single->double load-double->single) [(op (x ur) (y ur) (z imm32)) `(asm ,info ,(asm-fl-cvt op (info-loadfl-flreg info)) ,x ,y ,z)]) (define-instruction effect (store-single->double) [(op (x ur) (y ur) (z imm32)) `(asm ,info ,(asm-store-single->double (info-loadfl-flreg info)) ,x ,y ,z)]) (define-instruction effect (store-single store-double) [(op (x ur) (y ur) (z imm32)) `(asm ,info ,(asm-fl-store op (info-loadfl-flreg info)) ,x ,y ,z)]) (define-instruction effect (load-double load-single) [(op (x ur) (y ur) (z imm32)) `(asm ,info ,(asm-fl-load op (info-loadfl-flreg info)) ,x ,y ,z)]) (define-instruction value (get-double) [(op (z ur)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-get-double (info-loadfl-flreg info))))]) (define-instruction effect (flt) [(op (x mem ur) (y ur)) `(asm ,info ,asm-flt ,x ,y)]) (define-instruction effect (fl+ fl- fl/ fl*) [(op (x ur) (y ur) (z ur)) `(asm ,info ,(asm-flop-2 op) ,x ,y ,z)]) (define-instruction effect (flsqrt) [(op (x ur) (y ur)) `(asm ,info ,asm-flsqrt ,x ,y)]) (define-instruction effect inc-cc-counter [(op (x ur) (y imm32 ur) (z imm32 ur)) `(asm ,info ,asm-inc-cc-counter ,x ,y ,z)]) (define-instruction effect inc-profile-counter [(op (x ur mem) (y imm32 ur)) `(asm ,info ,asm-inc-profile-counter ,x ,y)]) (define-instruction value (trunc) [(op (z ur) (x ur)) `(set! ,(make-live-info) ,z (asm ,info ,asm-trunc ,x))]) (define-instruction value get-tc [(op (z ur)) ; always used with %rax as the lhs. we take advantage of this to use z also ; as the jump temp (rax is assumed by linker's x86_64_set_jump) (safe-assert (eq? z %rax)) `(set! ,(make-live-info) ,z (asm ,info ,asm-get-tc))]) (define-instruction value activate-thread [(op (z ur)) (safe-assert (eq? z %rax)) ; see get-tc `(set! ,(make-live-info) ,z (asm ,info ,asm-activate-thread))]) (define-instruction effect deactivate-thread [(op) `(asm ,info ,asm-deactivate-thread)]) (define-instruction effect unactivate-thread [(op (x ur)) (safe-assert (eq? x %Carg1)) `(asm ,info ,asm-unactivate-thread ,x)]) ; TODO: risc architectures will have to take info-asmlib-save-ra? into account (define-instruction value asmlibcall [(op (z ur)) (let ([urax (make-precolored-unspillable 'urax %rax)]) ; rax is assumed by linker's x86_64_set_jump (seq `(set! ,(make-live-info) ,urax (asm ,null-info ,asm-kill)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-library-call (info-asmlib-libspec info)) ,urax ,(info-kill*-live*-live* info) ...))))]) (define-instruction effect asmlibcall! [(op) (let ([urax (make-precolored-unspillable 'urax %rax)]) ; rax is assumed by linker's x86_64_set_jump (seq `(set! ,(make-live-info) ,urax (asm ,null-info ,asm-kill)) `(asm ,info ,(asm-library-call (info-asmlib-libspec info)) ,urax ,(info-kill*-live*-live* info) ...)))]) (safe-assert (reg-callee-save? %tc)) ; no need to save-restore (define-instruction effect (c-simple-call) [(op) (let ([urax (make-precolored-unspillable 'urax %rax)]) ; rax is assumed by linker's x86_64_set_jump (seq `(set! ,(make-live-info) ,urax (asm ,null-info ,asm-kill)) `(asm ,info ,(asm-c-simple-call (info-c-simple-call-entry info)) ,urax)))]) (define-instruction value pop [(op (z ur)) `(set! ,(make-live-info) ,z (asm ,info ,asm-pop))]) (define-instruction pred (fl= fl< fl<=) [(op (x ur) (y ur)) (let ([info (make-info-condition-code op #t #f)]) ; NB: reversed? flag is assumed to be #t (values '() `(asm ,info ,(asm-fl-relop info) ,x ,y)))]) (define-instruction pred (eq? u< < > <= >=) ; the idea (following from the intel x86/x86_64 documentation) ; is that we want to squeeze this into a CMP that allows one of ; the following formats: ; CMP r/m, imm ; CMP r/m, r ; CMP r, r/m ; the last format we may want to drop, since it uses a different ; format from the one above it, but is interchangeable with it, ; if we reverse the operands. [(op (x mem) (y ur imm32)) (let ([info (make-info-condition-code op #f #t)]) (values '() `(asm ,info ,(asm-relop info) ,x ,y)))] [(op (x ur) (y mem)) (let ([info (make-info-condition-code op #t #t)]) (values '() `(asm ,info ,(asm-relop info) ,y ,x)))] [(op (x imm32) (y ur mem)) (let ([info (make-info-condition-code op #t #t)]) (values '() `(asm ,info ,(asm-relop info) ,y ,x)))] [(op (x ur) (y ur imm32)) (let ([info (make-info-condition-code op #f #t)]) (values '() `(asm ,info ,(asm-relop info) ,x ,y)))]) (define-instruction pred (condition-code) [(op) (values '() `(asm ,info ,(asm-condition-code info)))]) (let* ([info-cc-eq (make-info-condition-code 'eq? #f #t)] [asm-eq (asm-relop info-cc-eq)]) (define-instruction pred (type-check?) [(op (x ur mem) (mask imm32 ur) (type imm32 ur)) (let ([tmp (make-tmp 'u)]) (values (with-output-language (L15d Effect) (seq `(set! ,(make-live-info) ,tmp ,x) `(set! ,(make-live-info) ,tmp (asm ,null-info ,asm-logand ,tmp ,mask)))) `(asm ,info-cc-eq ,asm-eq ,tmp ,type)))]) (define-instruction pred (logtest log!test) [(op (x mem) (y ur imm32)) (values '() `(asm ,info-cc-eq ,(asm-logtest (eq? op 'log!test) info-cc-eq) ,x ,y))] [(op (x ur imm32) (y mem)) (values '() `(asm ,info-cc-eq ,(asm-logtest (eq? op 'log!test) info-cc-eq) ,y ,x))] [(op (x imm32) (y ur)) (values '() `(asm ,info-cc-eq ,(asm-logtest (eq? op 'log!test) info-cc-eq) ,y ,x))] [(op (x ur) (y ur imm32)) (values '() `(asm ,info-cc-eq ,(asm-logtest (eq? op 'log!test) info-cc-eq) ,x ,y))]) (define-instruction pred (lock!) [(op (x ur) (y ur) (w imm32)) (let ([uts (make-precolored-unspillable 'uts %ts)]) (values (nanopass-case (L15d Triv) w [(immediate ,imm) (with-output-language (L15d Effect) (seq `(set! ,(make-live-info) ,uts (immediate 1)) `(set! ,(make-live-info) ,uts (asm ,info ,asm-exchange ,uts (mref ,x ,y ,imm)))))]) `(asm ,info-cc-eq ,asm-eq ,uts (immediate 0))))])) (define-instruction effect (locked-incr!) [(op (x ur) (y ur) (w imm32)) `(asm ,info ,asm-locked-incr ,x ,y ,w)]) (define-instruction effect (locked-decr!) [(op (x ur) (y ur) (w imm32)) `(asm ,info ,asm-locked-decr ,x ,y ,w)]) (define-instruction effect (cas) [(op (x ur) (y ur) (w imm32) (old ur) (new ur)) (let ([urax (make-precolored-unspillable 'urax %rax)]) (with-output-language (L15d Effect) (seq `(set! ,(make-live-info) ,urax ,old) ;; NB: may modify %rax: `(asm ,info ,asm-locked-cmpxchg ,x ,y ,w ,urax ,new))))]) (define-instruction effect (pause) [(op) `(asm ,info ,asm-pause)]) (define-instruction value read-performance-monitoring-counter [(op (z ur) (x ur mem imm)) (safe-assert (eq? z %rax)) (safe-assert (and (info-kill*? info) (memq %rdx (info-kill*-kill* info)))) (let ([urcx (make-precolored-unspillable 'urcx %rcx)]) (seq `(set! ,(make-live-info) ,urcx ,x) `(set! ,(make-live-info) ,z (asm ,info ,asm-read-performance-monitoring-counter ,urcx))))]) (define-instruction value read-time-stamp-counter [(op (z ur)) (safe-assert (eq? z %rax)) (safe-assert (and (info-kill*? info) (memq %rdx (info-kill*-kill* info)))) `(set! ,(make-live-info) ,z (asm ,info ,asm-read-time-stamp-counter))]) ; NB: shouldn't need to list (info-kill*-live*-live* info) ... here, since we've already ; NB: computed spillable/register live sets (define-instruction effect (c-call) [(op (x ur mem)) `(asm ,info ,asm-indirect-call ,x ,(info-kill*-live*-live* info) ...)]) (define-instruction effect (push) [(op (x ur)) `(asm ,info ,asm-push ,x)]) (define-instruction effect save-flrv [(op) `(asm ,info ,asm-save-flrv)]) (define-instruction effect restore-flrv [(op) `(asm ,info ,asm-restore-flrv)]) (define-instruction effect invoke-prelude ; align sp on 16-byte boundary, taking into account 8-byte ; return address already pushed by caller [(op) (seq `(set! ,(make-live-info) ,%sp (asm ,info ,asm-sub ,%sp (immediate 8))) `(set! ,(make-live-info) ,%tc ,%Carg1))]) ) ;;; SECTION 3: assembler (module asm-module (; required exports asm-move asm-move/extend asm-load asm-store asm-swap asm-library-call asm-library-jump asm-mul asm-muli asm-addop asm-add asm-sub asm-negate asm-sub-negate asm-pop asm-shiftop asm-sll asm-logand asm-lognot asm-logtest asm-fl-relop asm-relop asm-push asm-indirect-jump asm-literal-jump asm-direct-jump asm-return-address asm-jump asm-conditional-jump asm-data-label asm-rp-header asm-lea1 asm-lea2 asm-indirect-call asm-condition-code asm-fl-cvt asm-fl-store asm-fl-load asm-flt asm-trunc asm-div asm-exchange asm-pause asm-locked-incr asm-locked-decr asm-locked-cmpxchg asm-flop-2 asm-flsqrt asm-c-simple-call asm-save-flrv asm-restore-flrv asm-return asm-c-return asm-size asm-enter asm-foreign-call asm-foreign-callable asm-inc-profile-counter asm-inc-cc-counter asm-read-time-stamp-counter asm-read-performance-monitoring-counter ; threaded version specific asm-get-tc asm-activate-thread asm-deactivate-thread asm-unactivate-thread ; machine dependent exports asm-sext-rax->rdx asm-store-single->double asm-kill asm-get-double) (define ax-register? (case-lambda [(x) (record-case x [(reg) r #t] [else #f])] [(x reg) (record-case x [(reg) r (eq? r reg)] [else #f])])) (define ax-ea-reg-code (lambda (ea) (record-case ea [(reg) r (reg-mdinfo r)] [else (sorry! 'ax-ea-reg-code "ea=~s" ea)]))) (define ax-imm-data (lambda (ea) (record-case ea [(imm) (n) n] [else ($oops 'assembler-internal "ax-imm-data ea=~s" ea)]))) ; define-op sets up assembly op macros-- ; suffixes are a sub-list of (b w l 1)-- ; the opcode, the size (byte word, long, quad), and all other expressions ; are passed to the specified handler-- ; for prefix 'p' and each suffix 's' a macro of the form 'ps' is set up-- ; if no suffix is specified the prefix is defined as a macro (define-syntax define-op (lambda (x) (syntax-case x () [(k prefix (suffix ...) handler e ...) (let ([suffix* (datum (suffix ...))]) (unless (andmap (lambda (x) (memq x '(b w l *))) suffix*) (syntax-error x (format "invalid suffix list ~s" suffix*))) (with-syntax ([(op ...) (map (lambda (x) (if (eq? x '*) (construct-name #'k "asmop-" #'prefix) (construct-name #'k "asmop-" #'prefix x))) suffix*)] [(size ...) (map (lambda (x) (case x [(b) #'byte] [(w) #'word] [(l) #'long] [(*) #'quad])) suffix*)]) #'(begin (define-syntax op (syntax-rules () [(_ mneu arg (... ...)) (handler 'mneu 'size e ... arg (... ...))])) ...)))] [(k op handler e ...) (with-syntax ([op (construct-name #'k "asmop-" #'op)]) #'(define-syntax op (syntax-rules () [(_ mneu arg (... ...)) (handler 'mneu e ... arg (... ...))])))]))) (define-syntax emit (lambda (x) (syntax-case x () [(k op x ...) (with-syntax ([emit-op (construct-name #'k "asmop-" #'op)]) #'(emit-op op x ...))]))) ;;; note that the assembler isn't clever--you must be very explicit about ;;; which flavor you want, and there are a few new varieties introduced ;;; (commented-out opcodes are not currently used by the assembler-- ;;; spaces are left to indicate possible size extensions) (define-op asl (*) unary-op #b1101001 #b100) ; shifts by CL (define-op lsr (*) unary-op #b1101001 #b101) ; shifts by CL (define-op asr (*) unary-op #b1101001 #b111) ; shifts by CL (define-op asli (*) shifti-op #b1100000 #b100) (define-op lsri (*) shifti-op #b1100000 #b101) (define-op asri (*) shifti-op #b1100000 #b111) (define-op addi (#;b *) addi-op #b100000 #b000) (define-op subi (#;b *) addi-op #b100000 #b101) (define-op cmpi (b *) addi-op #b100000 #b111) (define-op ori (#;b *) logi-op #b001) (define-op andi (b *) logi-op #b100) (define-op xori (#;b *) logi-op #b110) (define-op testi (b *) testi-op #b1111011 #b000) (define-op movi (b w l *) movi-op #b000) (define-op mov (b w l *) binary-op #b100010) (define-op movsb mulq-op #b10111110) (define-op movsw mulq-op #b10111111) (define-op movsl quad-op #b01100011) (define-op movzb mulq-op #b10110110) (define-op movzw mulq-op #b10110111) (define-op add (#;b *) binary-op #b000000) (define-op or (#;b *) binary-op #b000010) (define-op and (#;b *) binary-op #b001000) (define-op sub (#;b *) binary-op #b001010) (define-op xor (#;b *) binary-op #b001100) (define-op test (#;b *) test-op #b1000010) (define-op cmp (#;b *) binary-op #b001110) (define-op xchg (#;b *) xchg-op #b1000011) (define-op bswap (#;l *) bswap-op) (define-op divsax (*) unary-op #b1111011 #b111) #;(define-op mulsax (*) unary-op #b1111011 #b100) (define-op muls mulq-op #b10101111) (define-op mulsi muliq-op #b01101001) (define-op lea quad-op #b10001101) (define-op pop push-op #b01011) (define-op push push-op #b01010) (define-op pushi pushi-op) (define-op pushf byte-op #b10011100) (define-op popf byte-op #b10011101) #;(define-op nop byte-op #b10010000) (define-op ret byte-op #b11000011) (define-op extad quad-byte-op #b10011001) ; extend rax to rdx (define-op rdtsc two-byte-op #b1111 #b00110001) ; read time-stamp counter (define-op rdpmc two-byte-op #b1111 #b00110011) ; read performance monitoring counter (define-op pause two-byte-op #b11110011 #b10010000) ; equivalent to rep nop (define-op dec (#;b *) unary-op #b1111111 #b001) (define-op inc (#;b *) unary-op #b1111111 #b000) (define-op neg (b *) unary-op #b1111011 #b011) ; was commented out in x86_64macros (define-op not (#;b *) unary-op #b1111011 #b010) (define-op locked-dec (#;b *) locked-unary-op #b1111111 #b001) (define-op locked-inc (#;b *) locked-unary-op #b1111111 #b000) (define-op locked-cmpxchg (*) locked-cmpxchg-op) ; also do inc-reg dec-reg ; the following are forms of the call instruction and push the return address (define-op call jump-op #b010) ; reg/mem indirect #;(define-op bsrl branch-always-op long #b11101000) ; pc-relative (define-op bsr bsr-op) ; the following are forms of the jmp instruction (define-op jmp jump-op #b100) ; reg/mem indirect (define-op bra bra-op) (define-op bvs branch-op #b0000) ; jump on overflow (define-op bvc branch-op #b0001) ; jump on not overflow (define-op bcs branch-op #b0010) ; jump on below (carry set) (define-op bcc branch-op #b0011) ; jump on not below (carry clear) (define-op beq branch-op #b0100) ; jump on equal (define-op bne branch-op #b0101) ; jump on not equal (define-op bls branch-op #b0110) ; jump on less or same (below or equal) (define-op bhi branch-op #b0111) ; jump on higher (above) (define-op blt branch-op #b1100) ; jump on less than (define-op bge branch-op #b1101) ; jump on greater than or equal (define-op ble branch-op #b1110) ; jump on less than or equal (define-op bgt branch-op #b1111) ; jump on greater than ; SSE2 instructions (define-op sse.addsd sse-op1 #xF2 #x58 0) (define-op sse.andpd sse-op1 #x66 #x54 0) (define-op sse.cvtss2sd sse-op1 #xF3 #x5A 0) (define-op sse.cvtsd2ss sse-op1 #xF2 #x5A 0) (define-op sse.cvttsd2si sse-op1 #xF2 #x2C 1) (define-op sse.cvtsi2sd sse-op1 #xF2 #x2A 1) (define-op sse.divsd sse-op1 #xF2 #x5E 0) (define-op sse.movd sse-op2 #x66 #x6E #x7E 1) (define-op sse.movsd sse-op2 #xF2 #x10 #x11 0) (define-op sse.movss sse-op2 #xF3 #x10 #x11 0) (define-op sse.mulsd sse-op1 #xF2 #x59 0) (define-op sse.sqrtsd sse-op1 #xF2 #x51 0) (define-op sse.subsd sse-op1 #xF2 #x5C 0) (define-op sse.ucomisd sse-op1 #x66 #x2E 0) (define-op sse.xorpd sse-op1 #x66 #x57 0) (define sse-op1 (lambda (op prefix-code op-code w source dest-reg code*) (emit-code (op source dest-reg code*) (build byte prefix-code) (ax-ea-rex w source dest-reg #f) (build byte #x0F) (build byte op-code) (ax-ea-modrm-reg source dest-reg) (ax-ea-sib source) (ax-ea-addr-disp source)))) (define sse-op2 (lambda (op prefix-code dstreg-op-code srcreg-op-code w source dest code*) (cond [(ax-register? source) (emit-code (op source dest code*) (build byte prefix-code) (ax-ea-rex w dest source #f) (build byte #x0F) (build byte srcreg-op-code) (ax-ea-modrm-reg dest source) (ax-ea-sib dest) (ax-ea-addr-disp dest))] [(ax-register? dest) (emit-code (op source dest code*) (build byte prefix-code) (ax-ea-rex w source dest #f) (build byte #x0F) (build byte dstreg-op-code) (ax-ea-modrm-reg source dest) (ax-ea-sib source) (ax-ea-addr-disp source))] [else ($oops 'assembler-internal "sse-op2 source=~s dest=~s" source dest)]))) (define mulq-op ; used for movs/movz as well as mulsq (lambda (op op-code source-ea dest-reg code*) (emit-code (op source-ea dest-reg code*) (ax-ea-rex 1 source-ea dest-reg 'quad) (build byte #b00001111) (build byte op-code) (ax-ea-modrm-reg source-ea dest-reg) (ax-ea-sib source-ea) (ax-ea-addr-disp source-ea)))) (define muliq-op (lambda (op op-code imm-data source-ea dest-reg code*) (emit-code (op imm-data source-ea dest-reg code*) (ax-ea-rex 1 source-ea dest-reg 'quad) (build byte op-code) (ax-ea-modrm-reg source-ea dest-reg) (ax-ea-sib source-ea) (ax-ea-addr-disp source-ea) (build long (ax-imm-data imm-data))))) (define quad-op (lambda (op op-code source-ea dest-reg code*) (emit-code (op source-ea dest-reg code*) (ax-ea-rex 1 source-ea dest-reg 'quad) (build byte op-code) (ax-ea-modrm-reg source-ea dest-reg) (ax-ea-sib source-ea) (ax-ea-addr-disp source-ea)))) (define test-op (lambda (op size op-code source-ea reg code*) (emit-code (op source-ea reg code*) (ax-ea-rex (if (eq? size 'quad) 1 0) source-ea reg size) (build byte (byte-fields [1 op-code] [0 (ax-size-code size)])) (ax-ea-modrm-reg source-ea reg) (ax-ea-sib source-ea) (ax-ea-addr-disp source-ea)))) (define unary-op (lambda (op size op-code ttt-code dest-ea code*) (emit-code (op dest-ea code*) (ax-ea-rex (if (eq? size 'quad) 1 0) dest-ea #f size) (build byte (byte-fields [1 op-code] [0 (ax-size-code size)])) (ax-ea-modrm-ttt dest-ea ttt-code) (ax-ea-sib dest-ea) (ax-ea-addr-disp dest-ea)))) (define locked-unary-op (lambda (op size op-code ttt-code dest-ea code*) (emit-code (op dest-ea code*) (build byte #xf0) ; lock prefix (ax-ea-rex (if (eq? size 'quad) 1 0) dest-ea #f size) (build byte (byte-fields [1 op-code] [0 (ax-size-code size)])) (ax-ea-modrm-ttt dest-ea ttt-code) (ax-ea-sib dest-ea) (ax-ea-addr-disp dest-ea)))) (define locked-cmpxchg-op (lambda (op size dest-ea new-reg code*) (begin (emit-code (op dest-ea new-reg code*) (build byte #xf0) ; lock prefix (ax-ea-rex (if (eq? size 'quad) 1 0) dest-ea new-reg size) (build byte #x0f) (build byte (byte-fields [1 #b1011000] [0 (ax-size-code size)])) (ax-ea-modrm-reg dest-ea new-reg) (ax-ea-sib dest-ea) (ax-ea-addr-disp dest-ea))))) (define pushi-op (lambda (op imm-ea code*) (if (ax-range? -128 imm-ea 127) (emit-code (op imm-ea code*) (build byte 106) (ax-ea-imm-data 'byte imm-ea)) (emit-code (op imm-ea code*) (build byte 104) (ax-ea-imm-data 'long imm-ea))))) ; imm-data can be either an (imm n) or else a (literal size addr) record. ; (define testi-op (lambda (op size op-code ttt-code imm-ea dest-ea code*) (emit-code (op imm-ea dest-ea code*) (ax-ea-rex (if (eq? size 'quad) 1 0) dest-ea #f size) (build byte (byte-fields [1 op-code] [0 (ax-size-code size)])) (ax-ea-modrm-ttt dest-ea ttt-code) (ax-ea-sib dest-ea) (ax-ea-addr-disp dest-ea) (ax-ea-imm-data size imm-ea)))) (define logi-op (lambda (op size ttt-code imm-ea dest-ea code*) (if (and (eq? size 'quad) (record-case imm-ea [(imm) (n) (<= -128 n 127)] [else #f])) (emit-code (op imm-ea dest-ea code*) (ax-ea-rex 1 dest-ea #f size) (build byte (byte-fields [1 #b1000001] [0 (ax-size-code size)])) (ax-ea-modrm-ttt dest-ea ttt-code) (ax-ea-sib dest-ea) (ax-ea-addr-disp dest-ea) (ax-ea-imm-data 'byte imm-ea)) (emit-code (op imm-ea dest-ea code*) (ax-ea-rex 1 dest-ea #f size) (build byte (byte-fields [1 #b1000000] [0 (ax-size-code size)])) (ax-ea-modrm-ttt dest-ea ttt-code) (ax-ea-sib dest-ea) (ax-ea-addr-disp dest-ea) (ax-ea-imm-data size imm-ea))))) (define addi-op (lambda (op size op-code ttt-code imm-ea dest-ea code*) (if (and (eq? size 'quad) (record-case imm-ea [(imm) (n) (<= -128 n 127)] [else #f])) (emit-code (op imm-ea dest-ea code*) (ax-ea-rex 1 dest-ea #f size) (build byte (byte-fields [2 op-code] [1 1] [0 (ax-size-code size)])) (ax-ea-modrm-ttt dest-ea ttt-code) (ax-ea-sib dest-ea) (ax-ea-addr-disp dest-ea) (ax-ea-imm-data 'byte imm-ea)) (emit-code (op imm-ea dest-ea code*) (ax-ea-rex (if (eq? size 'quad) 1 0) dest-ea #f size) (build byte (byte-fields [2 op-code] [1 0] [0 (ax-size-code size)])) (ax-ea-modrm-ttt dest-ea ttt-code) (ax-ea-sib dest-ea) (ax-ea-addr-disp dest-ea) (ax-ea-imm-data size imm-ea))))) (define movi-op (lambda (op size ttt-code imm-ea dest-ea code*) (cond [(and (eq? size 'quad) (record-case dest-ea [(reg) stuff #t] [else #f]) (not (ax-range? #x-80000000 imm-ea #x7fffffff))) (emit-code (op imm-ea dest-ea code*) (ax-ea-rex 1 dest-ea #f 'quad) (build byte (byte-fields [4 #b1011] [3 (ax-size-code size)] [0 (fxlogand (ax-ea-reg-code dest-ea) 7)])) (ax-ea-imm-data 'full-quad imm-ea))] [else (emit-code (op imm-ea dest-ea code*) (and (eq? size 'word) (build byte 102)) (ax-ea-rex (if (eq? size 'quad) 1 0) dest-ea #f size) (build byte (byte-fields [1 #b1100011] [0 (ax-size-code size)])) (ax-ea-modrm-ttt dest-ea ttt-code) (ax-ea-sib dest-ea) (ax-ea-addr-disp dest-ea) (ax-ea-imm-data size imm-ea))]))) ;;; always need byte immediate data for shift ops (define shifti-op (lambda (op size op-code ttt-code imm-ea dest-ea code*) (emit-code (op imm-ea dest-ea code*) (ax-ea-rex (if (eq? size 'quad) 1 0) dest-ea #f size) (build byte (byte-fields [1 op-code] [0 (ax-size-code size)])) (ax-ea-modrm-ttt dest-ea ttt-code) (ax-ea-sib dest-ea) (ax-ea-addr-disp dest-ea) (ax-ea-imm-data 'byte imm-ea)))) (define binary-op (lambda (op size op-code source dest code*) (cond [(ax-register? source) (emit-code (op source dest code*) (and (eq? size 'word) (build byte 102)) (ax-ea-rex (if (eq? size 'quad) 1 0) dest source size) (build byte (byte-fields [2 op-code] [1 0] [0 (ax-size-code size)])) (ax-ea-modrm-reg dest source) (ax-ea-sib dest) (ax-ea-addr-disp dest))] [(ax-register? dest) (emit-code (op source dest code*) (and (eq? size 'word) (build byte 102)) (ax-ea-rex (if (eq? size 'quad) 1 0) source dest size) (build byte (byte-fields [2 op-code] [1 1] [0 (ax-size-code size)])) (ax-ea-modrm-reg source dest) (ax-ea-sib source) (ax-ea-addr-disp source))] [else ($oops 'assembler-internal "binary-op source=~s dest=~s" source dest)]))) (define xchg-op (lambda (op size op-code source dest code*) (cond [(ax-register? source) (emit-code (op source dest code*) (ax-ea-rex (if (eq? size 'quad) 1 0) dest source size) (build byte (byte-fields [1 op-code] [0 (ax-size-code size)])) (ax-ea-modrm-reg dest source) (ax-ea-sib dest) (ax-ea-addr-disp dest))] [(ax-register? dest) (emit-code (op source dest code*) (ax-ea-rex (if (eq? size 'quad) 1 0) source dest size) (build byte (byte-fields [1 op-code] [0 (ax-size-code size)])) (ax-ea-modrm-reg source dest) (ax-ea-sib source) (ax-ea-addr-disp source))] [else ($oops 'assembler-internal "xchg-op source=~s dest=~s" source dest)]))) (define branch-op (lambda (op condition-code disp code*) (record-case disp [(label) (offset l) (if (and (fixnum? offset) (fx<= -128 offset 127)) (emit-code (op disp code*) (build byte (byte-fields [4 7] [0 condition-code])) (build byte offset)) (emit-code (op disp code*) (build byte 15) (build byte (byte-fields [4 8] [0 condition-code])) (build long offset)))] [else (emit-code (op disp code*) (build byte 15) (build byte (byte-fields [4 8] [0 condition-code])) (ax-ea-branch-disp disp))]))) (define jump-op (lambda (op ttt-code dest-ea code*) (emit-code (op dest-ea code*) (ax-ea-rex 0 dest-ea #f 'quad) (build byte 255) (ax-ea-modrm-ttt dest-ea ttt-code) (ax-ea-sib dest-ea) (ax-ea-addr-disp dest-ea)))) (define bra-op (lambda (op disp code*) (record-case disp [(label) (offset l) (if (and (fixnum? offset) (fx<= -128 offset 127)) (emit-code (op disp code*) (build byte #b11101011) (build byte offset)) (emit-code (op disp code*) (build byte #b11101001) (build long offset)))] [else (emit-code (op disp code*) (build byte #b11101001) (ax-ea-branch-disp disp))]))) ;; TODO: not useful on x86_64? (define bsr-op (lambda (op disp code*) (emit-code (op disp code*) (build byte #b11101000) (if (pair? disp) (ax-ea-branch-disp disp) (build long disp))))) (define byte-op (lambda (op op-code code*) (emit-code (op code*) (build byte op-code)))) (define two-byte-op (lambda (op op-code1 op-code2 code*) (emit-code (op code*) (build byte op-code1) (build byte op-code2)))) (define bswap-op (lambda (op size reg code*) (begin (unless (ax-register? reg) ($oops 'assembler-internal "(bswap-op) ~s is not a real register" reg)) (emit-code (op reg code*) (ax-ea-rex (if (eq? size 'quad) 1 0) reg #f size) (build byte #b00001111) (build byte (byte-fields [3 #b11001] [0 (fxlogand (ax-ea-reg-code reg) 7)])))))) (define quad-byte-op (lambda (op op-code code*) (emit-code (op code*) (build byte #x48) ; rex prefix w/rex.w bit set (build byte op-code)))) (define push-op (lambda (op op-code reg code*) (begin (unless (ax-register? reg) ($oops 'assembler-internal "(push-op) ~s is not a real register" reg)) (emit-code (op reg code*) (ax-ea-rex 0 reg #f 'quad) (build byte (byte-fields [3 op-code] [0 (fxlogand (ax-ea-reg-code reg) 7)])))))) (define-syntax emit-code (lambda (x) (define build-maybe-cons* (lambda (e* e-ls) (if (null? e*) e-ls #`(let ([t #,(car e*)] [ls #,(build-maybe-cons* (cdr e*) e-ls)]) (if t (cons t ls) ls))))) (syntax-case x () [(_ (op opnd ... ?code*) chunk ...) (build-maybe-cons* #'(chunk ...) #'(aop-cons* `(asm ,op ,opnd ...) ?code*))]))) (define-who ax-size-code (lambda (x) (case x [(byte) 0] [(word) 1] [(long) 1] [(quad) 1] [else (sorry! who "invalid size ~s" x)]))) (define-syntax build (syntax-rules () [(_ x e) (and (memq (datum x) '(byte word long quad)) (integer? (datum e))) (quote (x . e))] [(_ x e) (memq (datum x) '(byte word long quad)) (cons 'x e)])) (define-syntax byte-fields (syntax-rules () [(byte-fields (n e) ...) (andmap fixnum? (datum (n ...))) (fx+ (fxsll e n) ...)])) (define ax-ea-rex (lambda (w ea maybe-reg size) (define (rex-required? x) ; rex prefix is required to access lsb of RSP, RBP, RSI, RDI ; (and R8-R15, but they require rex prefix anyway) (and (eq? size 'byte) (record-case x [(reg) r (fx<= 4 (reg-mdinfo r) 7)] [else #f]))) (define (rex.r) (if maybe-reg (record-case maybe-reg [(reg) r (fxsrl (reg-mdinfo r) 3)] [else ($oops 'assembler-internal "maybe-reg=~s" maybe-reg)]) 0)) (define (build-rex x b) (let ([b (build byte (byte-fields [4 #b0100] [3 w] [2 (rex.r)] [1 x] [0 b]))]) (and (or (not (fx= (cdr b) #x40)) (rex-required? ea) (and maybe-reg (rex-required? maybe-reg))) b))) (record-case ea [(index) (size index-reg base-reg) (build-rex (fxsrl (reg-mdinfo index-reg) 3) (fxsrl (reg-mdinfo base-reg) 3))] [(riprel) stuff (build-rex 0 0)] [(disp) (size reg) (build-rex 0 (fxsrl (reg-mdinfo reg) 3))] [(reg) r (build-rex 0 (fxsrl (reg-mdinfo r) 3))] [else ($oops 'assembler-internal "ax-ea-rex ea ~s" ea)]))) (define ax-ea-addr-disp (lambda (dest-ea) (record-case dest-ea [(index) (size index-reg base-reg) (cond [(and (fxzero? size) (not (eq? base-reg %rbp)) (not (eq? base-reg %r13))) #f] [(ax-byte-size? size) (build byte size)] [else (build long size)])] [(riprel) (disp) (build long disp)] [(disp) (size reg) (cond [(and (fxzero? size) (not (eq? reg %rbp)) (not (eq? reg %r13))) #f] ; indirect [(ax-byte-size? size) (build byte size)] [else (build long size)])] [(reg) r #f] [else ($oops 'assembler-internal "ax-ea-addr-disp dest-ea=~s" dest-ea)]))) (define ax-ea-sib (let ([ax-ss-index-base (lambda (index-reg base-reg) (build byte (byte-fields [6 #b00] ; 2 bits, scaled by bytes. [3 index-reg] ; 3 bits, index register. [0 base-reg])))]) ; 3 bits, base register. (lambda (dest-ea) (record-case dest-ea [(index) (size index-reg base-reg) (ax-ss-index-base (fxlogand (reg-mdinfo index-reg) 7) (fxlogand (reg-mdinfo base-reg) 7))] [(riprel) stuff #f] [(disp) (size reg) (and (or (eq? reg %sp) (eq? reg %r12)) (ax-ss-index-base #b100 #b100))] [(reg) r #f] [else ($oops 'assembler-internal "ax-ea-sib dest-ea=~s" dest-ea)])))) (define ax-ea-modrm-reg (lambda (dest-ea reg) (ax-ea-modrm-ttt dest-ea (fxlogand (ax-ea-reg-code reg) 7)))) (define ax-ea-modrm-ttt (letrec ([ax-mod-ttt-r/m (lambda (mod ttt r/m) (build byte (byte-fields [6 mod] ; 2 bits [3 ttt] ; 3 bits [0 r/m])))] ; 3 bits [ax-r/m ; 3 bits (lambda (dest-ea) (record-case dest-ea [(index) (size index-reg base-reg) #b100] [(riprel) stuff #b101] [(disp) (size reg) (fxlogand (reg-mdinfo reg) 7)] [(reg) r (fxlogand (reg-mdinfo r) 7)] [else ($oops 'assembler-internal "ax-r/m dest-ea=~s" dest-ea)]))] [ax-mod ; 2 bits (lambda (dest-ea) (record-case dest-ea [(index) (size index-reg base-reg) (cond [(and (fxzero? size) (not (eq? base-reg %rbp)) (not (eq? base-reg %r13))) #b00] [(ax-byte-size? size) #b01] [else #b10])] [(riprel) stuff #b00] [(disp) (size reg) (cond [(and (fxzero? size) (not (eq? reg %rbp)) (not (eq? reg %r13))) #b00] ; indirect [(ax-byte-size? size) #b01] [else #b10])] [(reg) r #b11] [else ($oops 'assembler-internal "ax-mod dest-ea ~s" dest-ea)]))]) (lambda (dest-ea ttt) (ax-mod-ttt-r/m (ax-mod dest-ea) ttt (ax-r/m dest-ea))))) (define ax-ea-imm-data (lambda (size imm-data) (case size [(full-quad) (record-case imm-data [(literal) stuff (cons 'abs stuff)] [(funcrel) stuff (cons 'funcrel (ax-ea-imm-data 'quad stuff))] ; added, not sure if this works for x86_64 [(imm) (n) (cons 'quad n)] [else ($oops 'assembler-internal "ax-ea-imm-data size=~s imm-data=~s" size imm-data)])] [else (record-case imm-data [(imm) (n) (cons (if (eq? size 'quad) 'long size) n)] [else ($oops 'assembler-internal "ax-ea-imm-data size=~s imm-data=~s" size imm-data)])]))) (define ax-byte-size? (lambda (n) (<= -128 n 127))) (define ax-range? (lambda (low x high) (record-case x [(imm) (n) (<= low n high)] [else #f]))) (define ax-ea-branch-disp (lambda (dest-ea) (record-case dest-ea [(literal) stuff (cons 'rel stuff)] [else ($oops 'assembler-internal "ax-ea-branch-disp dest-ea=~s" dest-ea)]))) (define asm-size (lambda (x) (case (car x) [(asm x86_64-jump x86_64-call) 0] [(byte) 1] [(word) 2] [(long) 4] [else 8]))) (define-who asm-move (lambda (code* dest src) (Trivit (dest src) (record-case src [(imm) (n) (if (and (eqv? n 0) (record-case dest [(reg) r #t] [else #f])) (emit xor dest dest code*) (emit movi src dest code*))] [(literal) stuff (emit movi src dest code*)] [else (emit mov src dest code*)])))) (define-who asm-move/extend (lambda (op) (lambda (code* dest src) (Trivit (dest src) (case op [(sext8) (emit movsb src dest code*)] [(sext16) (emit movsw src dest code*)] [(sext32) (emit movsl src dest code*)] [(zext8) (emit movzb src dest code*)] [(zext16) (emit movzw src dest code*)] [(zext32) (emit movl src dest code*)] [else (sorry! who "unexpected op ~s" op)]))))) (define asm-fl-cvt (lambda (op flreg) (lambda (code* base index offset) (let ([src (build-mem-opnd base index offset)]) (case op [(load-single->double) (emit sse.cvtss2sd src (cons 'reg flreg) code*)] [(load-double->single) (emit sse.cvtsd2ss src (cons 'reg flreg) code*)]))))) (define asm-store-single->double (lambda (flreg) (lambda (code* base index offset) (let ([dest (build-mem-opnd base index offset)] [flreg (cons 'reg flreg)]) (emit sse.cvtss2sd flreg flreg (emit sse.movsd flreg dest code*)))))) (define asm-fl-store (lambda (op flreg) (lambda (code* base index offset) (let ([dest (build-mem-opnd base index offset)]) (case op [(store-single) (emit sse.movss (cons 'reg flreg) dest code*)] [(store-double) (emit sse.movsd (cons 'reg flreg) dest code*)]))))) (define asm-fl-load (lambda (op flreg) (lambda (code* base index offset) (let ([src (build-mem-opnd base index offset)]) (case op [(load-single) (emit sse.movss src (cons 'reg flreg) code*)] [(load-double) (emit sse.movsd src (cons 'reg flreg) code*)]))))) (define asm-get-double (lambda (flreg) (lambda (code* dst) (emit sse.movd (cons 'reg flreg) (cons 'reg dst) code*)))) (define asm-flt (lambda (code* src flonumreg) (Trivit (src) (let ([dest `(disp ,(constant flonum-data-disp) ,flonumreg)] [flreg (cons 'reg %flreg1)]) (emit sse.cvtsi2sd src flreg (emit sse.movsd flreg dest code*)))))) (define asm-flop-2 (lambda (op) (lambda (code* src1 src2 dest) (let ([src1 `(disp ,(constant flonum-data-disp) ,src1)] [src2 `(disp ,(constant flonum-data-disp) ,src2)] [dest `(disp ,(constant flonum-data-disp) ,dest)]) (let ([code* (emit sse.movsd (cons 'reg %flreg1) dest code*)]) (let ([code* (case op [(fl+) (emit sse.addsd src2 (cons 'reg %flreg1) code*)] [(fl-) (emit sse.subsd src2 (cons 'reg %flreg1) code*)] [(fl*) (emit sse.mulsd src2 (cons 'reg %flreg1) code*)] [(fl/) (emit sse.divsd src2 (cons 'reg %flreg1) code*)])]) (emit sse.movsd src1 (cons 'reg %flreg1) code*))))))) (define asm-flsqrt (lambda (code* src dest) (let ([src `(disp ,(constant flonum-data-disp) ,src)] [dest `(disp ,(constant flonum-data-disp) ,dest)]) (emit sse.sqrtsd src (cons 'reg %flreg1) (emit sse.movsd (cons 'reg %flreg1) dest code*))))) (define asm-trunc (lambda (code* dest flonumreg) (Trivit (dest) (let ([src `(disp ,(constant flonum-data-disp) ,flonumreg)]) (emit sse.cvttsd2si src dest code*))))) (define asm-load (lambda (type) (lambda (code* dest base index offset) (Trivit (dest) (let ([src (build-mem-opnd base index offset)]) (case type [(integer-64 unsigned-64) (emit mov src dest code*)] [(integer-32) (emit movsl src dest code*)] [(unsigned-32) (emit movl src dest code*)] ; clears upper 32 bits of destination register [(integer-16) (emit movsw src dest code*)] [(unsigned-16) (emit movzw src dest code*)] [(integer-8) (emit movsb src dest code*)] [(unsigned-8) (emit movzb src dest code*)] [else (sorry! 'asm-load "unexpected mref type ~s" type)])))))) (define asm-store (lambda (type) (lambda (code* base index offset src) (define imm8 (lambda (n) `(imm ,(modulo n #x100)))) (define imm16 (lambda (n) `(imm ,(modulo n #x10000)))) (Trivit (src) (let ([dest (build-mem-opnd base index offset)]) (record-case src [(imm) (n) (case type [(integer-64 unsigned-64) (emit movi src dest code*)] [(integer-32 unsigned-32) (emit movil src dest code*)] [(integer-16 unsigned-16) (emit moviw (imm16 n) dest code*)] [(integer-8 unsigned-8) (emit movib (imm8 n) dest code*)] [else (sorry! 'asm-store "unexpected mset! type ~s" type)])] [(literal) stuff (case type [(integer-64 unsigned-64) (emit movi src dest code*)] [(integer-32 unsigned-32) (emit movil src dest code*)] [(integer-16 unsigned-16) (emit moviw src dest code*)] [(integer-8 unsigned-8) (emit movib src dest code*)] [else (sorry! 'asm-store "unexpected mset! type ~s" type)])] [else (case type [(integer-64 unsigned-64) (emit mov src dest code*)] [(integer-32 unsigned-32) (emit movl src dest code*)] [(integer-16 unsigned-16) (emit movw src dest code*)] [(integer-8 unsigned-8) (emit movb src dest code*)] [else (sorry! 'asm-store "unexpected mset! type ~s" type)])])))))) (define asm-swap (lambda (type) (lambda (code* dest src) (Trivit (dest) (safe-assert (equal? (Triv->rand src) dest)) (emit bswap dest (case type [(integer-16) (emit asri '(imm 48) dest code*)] [(unsigned-16) (emit lsri '(imm 48) dest code*)] [(integer-32) (emit asri '(imm 32) dest code*)] [(unsigned-32) (emit lsri '(imm 32) dest code*)] [(integer-64 unsigned-64) code*] [else ($oops 'assembler-internal "unexpected asm-swap type argument ~s" type)])))))) (define asm-mul (lambda (code* dest src0 src1) (Trivit (dest src1) (safe-assert (equal? (Triv->rand src0) dest)) (emit muls src1 dest code*)))) (define asm-div (lambda (code* dest-rax src-rax src-rdx src2) (Trivit (src2) (safe-assert (and (eq? dest-rax %rax) (eq? src-rax %rax) (eq? src-rdx %rdx))) (emit divsax src2 code*)))) (define asm-sext-rax->rdx (lambda (code* dest-rdx src-rax) (safe-assert (and (eq? dest-rdx %rdx) (eq? src-rax %rax))) (emit extad code*))) (define asm-muli (lambda (code* dest src0 src1) (Trivit (dest src0 src1) (emit mulsi src1 src0 dest code*)))) (define-who asm-addop (lambda (op) (case op [(+) asm-add] [(logand) asm-logand] [(logor) asm-logor] [(logxor) asm-logxor] [else ($oops who "unsupported op ~s" op)]))) (define asm-add (lambda (code* dest src0 src1) (Trivit (dest src1) (safe-assert (equal? (Triv->rand src0) dest)) (record-case src1 [(imm literal) stuff (emit addi src1 dest code*)] [else (emit add src1 dest code*)])))) (define asm-read-performance-monitoring-counter (lambda (code* dest src) ; rdx is an implied dest and included in info's kill list (safe-assert (eq? dest %rax)) (safe-assert (eq? src %rcx)) (emit rdpmc code*))) (define asm-read-time-stamp-counter (lambda (code* dest) ; rdx is an implied dest and included in info's kill list (safe-assert (eq? dest %rax)) (emit rdtsc code*))) (define asm-inc-profile-counter (lambda (code* dest src) (Trivit (dest src) (record-case src [(imm) (n) (if (eqv? n 1) (emit inc dest code*) (emit addi src dest code*))] [else (emit add src dest code*)])))) (define-who asm-inc-cc-counter (lambda (code* base offset val) (let ([dest (nanopass-case (L16 Triv) offset [(immediate ,imm) `(disp ,imm ,base)] [,x `(index 0 ,x ,base)] [else ($oops who "unexpected increment offset ~s" offset)])]) (nanopass-case (L16 Triv) val [(immediate ,imm) (if (fx= imm 1) (emit inc dest code*) (emit addi `(imm ,imm) dest code*))] [,x (emit add (cons 'reg x) dest code*)] [else ($oops who "unsupported increment ~s" val)])))) (define asm-sub (lambda (code* dest src0 src1) (Trivit (dest src1) (safe-assert (equal? (Triv->rand src0) dest)) (record-case src1 [(imm literal) stuff (emit subi src1 dest code*)] [else (emit sub src1 dest code*)])))) (define asm-negate (lambda (code* dest src) (Trivit (dest) (safe-assert (equal? (Triv->rand src) dest)) (emit neg dest code*)))) (define asm-sub-negate (lambda (code* dest src0 src1) (Trivit (dest src1) (safe-assert (equal? (Triv->rand src0) dest)) (let ([code* (emit neg dest code*)]) (record-case src1 [(imm literal) stuff (emit subi src1 dest code*)] [else (emit sub src1 dest code*)]))))) (define asm-pop (lambda (code* dest) (Trivit (dest) (emit pop dest code*)))) (define asm-return (lambda () (emit addi '(imm 8) (cons 'reg %sp) (emit ret '())))) (define asm-c-return (lambda (info) (emit ret '()))) (define asm-locked-incr (lambda (code* base index offset) (let ([dest (build-mem-opnd base index offset)]) (emit locked-inc dest code*)))) (define asm-locked-decr (lambda (code* base index offset) (let ([dest (build-mem-opnd base index offset)]) (emit locked-dec dest code*)))) (define asm-locked-cmpxchg (lambda (code* base index offset old-v new-v) (let ([dest (build-mem-opnd base index offset)]) (emit locked-cmpxchg dest (cons 'reg new-v) code*)))) (define asm-pause (lambda (code*) (emit pause code*))) (define asm-exchange (lambda (code* dest src0 src1) (Trivit (dest src1) (safe-assert (equal? (Triv->rand src0) dest)) (emit xchg src1 dest code*)))) (define-who asm-shiftop (lambda (op) (case op [(sll) asm-sll] [(srl) asm-srl] [(sra) asm-sra] [else ($oops who "unsupported op ~s" op)]))) (define asm-sll (lambda (code* dest src0 src1) (Trivit (dest src1) (safe-assert (equal? (Triv->rand src0) dest)) (record-case src1 [(imm literal) stuff (emit asli src1 dest code*)] [else (safe-assert (ax-register? src1 %rcx)) (emit asl dest code*)])))) (define asm-srl (lambda (code* dest src0 src1) (Trivit (dest src1) (safe-assert (equal? (Triv->rand src0) dest)) (record-case src1 [(imm literal) stuff (emit lsri src1 dest code*)] [else (safe-assert (ax-register? src1 %rcx)) (emit lsr dest code*)])))) (define asm-sra (lambda (code* dest src0 src1) (Trivit (dest src1) (safe-assert (equal? (Triv->rand src0) dest)) (record-case src1 [(imm literal) stuff (emit asri src1 dest code*)] [else (safe-assert (ax-register? src1 %rcx)) (emit asr dest code*)])))) (define asm-logand (lambda (code* dest src0 src1) (Trivit (dest src1) (safe-assert (equal? (Triv->rand src0) dest)) (record-case src1 [(imm literal) stuff (emit andi src1 dest code*)] [else (emit and src1 dest code*)])))) (define asm-logor (lambda (code* dest src0 src1) (Trivit (dest src1) (safe-assert (equal? (Triv->rand src0) dest)) (record-case src1 [(imm literal) stuff (emit ori src1 dest code*)] [else (emit or src1 dest code*)])))) (define asm-logxor (lambda (code* dest src0 src1) (Trivit (dest src1) (safe-assert (equal? (Triv->rand src0) dest)) (record-case src1 [(imm literal) stuff (emit xori src1 dest code*)] [else (emit xor src1 dest code*)])))) (define asm-lognot (lambda (code* dest src) (Trivit (dest) (safe-assert (equal? (Triv->rand src) dest)) (emit not dest code*)))) (define asm-lea1 (lambda (offset) (rec asm-lea1-internal (lambda (code* dest src) (if (eq? src dest) (Trivit (dest) (emit addi `(imm ,offset) dest code*)) (Trivit (dest) (emit lea `(disp ,offset ,src) dest code*))))))) (define asm-lea2 (lambda (offset) (rec asm-lea2-internal (lambda (code* dest src1 src2) (cond [(and (eq? src1 dest) (fx= offset 0)) (Trivit (dest src2) (emit add src2 dest code*))] [(and (eq? src2 dest) (fx= offset 0)) (Trivit (dest src1) (emit add src1 dest code*))] [else (Trivit (dest) (emit lea `(index ,offset ,src1 ,src2) dest code*))]))))) (define asm-logtest (lambda (i? info) (lambda (l1 l2 offset x y) (Trivit (x y) (safe-assert (record-case x [(disp reg index literal@) stuff #t] [else #f])) (values (record-case y [(imm) (n) (if (and (fixnum? n) (fx= (fxlogand n #xff) n) (record-case x [(reg) r #t] ; counting on little-endian byte order [(disp index) stuff #t])) (emit testib y x '()) (emit testi y x '()))] [(literal) stuff (emit testi y x '())] [else (emit test x y '())]) (let-values ([(l1 l2) (if i? (values l2 l1) (values l1 l2))]) (asm-conditional-jump info l2 l1 offset))))))) (define asm-fl-relop (lambda (info) (lambda (l1 l2 offset x y) (values (let ([x `(disp ,(constant flonum-data-disp) ,x)] [y `(disp ,(constant flonum-data-disp) ,y)]) (emit sse.movsd y (cons 'reg %flreg1) (emit sse.ucomisd x (cons 'reg %flreg1) '()))) (asm-conditional-jump info l1 l2 offset))))) (define asm-relop (lambda (info) (rec asm-relop-internal (lambda (l1 l2 offset x y) (Trivit (x y) (safe-assert (record-case x [(reg disp index) ignore #t] [else #f])) (values (record-case y [(imm literal) stuff (emit cmpi y x '())] [else (emit cmp y x '())]) (asm-conditional-jump info l1 l2 offset))))))) (define asm-condition-code (lambda (info) (rec asm-check-flag-internal (lambda (l1 l2 offset) (values '() (asm-conditional-jump info l1 l2 offset)))))) ; TODO: should this also handle pushil? (define asm-push (lambda (code* x) (Trivit (x) (emit push x code*)))) (define asm-save-flrv (lambda (code*) ; save float return value in case we're calling into C ; from the foreign-procedure return path to allocate a flonum ; return value. need 16 bytes rather than 8 to maintain ; 16-byte stack alignment (emit subi '(imm 16) (cons 'reg %sp) (emit sse.movsd (cons 'reg %Cfpretval) `(disp 0 ,%sp) code*)))) (define asm-restore-flrv (lambda (code*) (emit sse.movsd `(disp 0 ,%sp) (cons 'reg %Cfpretval) (emit addi '(imm 16) (cons 'reg %sp) code*)))) (define asm-library-jump (lambda (l) (asm-helper-jump '() `(x86_64-jump ,(constant code-data-disp) (library-code ,(libspec-label-libspec l)))))) (define asm-library-call (lambda (libspec) (let ([target `(x86_64-call ,(constant code-data-disp) (library-code ,libspec))]) (rec asm-asm-call-internal (lambda (code* jmp-reg . ignore) (asm-helper-call code* target jmp-reg)))))) (define asm-c-simple-call (lambda (entry) (rec asm-c-simple-call-internal (lambda (code* jmp-reg) (let ([sp-opnd (cons 'reg %sp)]) (if (fx= entry (lookup-c-entry Sreturn)) ; pretend S_generic_invoke called Sreturn directly by wiping out ; pad added by invoke-prelude and jumping rather than calling (emit addi '(imm 8) sp-opnd (asm-helper-jump code* `(x86_64-jump 0 (entry ,entry)))) (let ([target `(x86_64-call 0 (entry ,entry))]) (if-feature windows ; must leave room for callee to store argument registers, ; even if there are no arguments (emit subi '(imm 32) sp-opnd (asm-helper-call (emit addi '(imm 32) sp-opnd code*) target jmp-reg)) (asm-helper-call code* target jmp-reg))))))))) (define asm-get-tc (let ([target `(x86_64-call 0 (entry ,(lookup-c-entry get-thread-context)))]) (lambda (code* jmp-reg) ; dest is ignored, since it is always the first C argument (rax in this case) (asm-helper-call code* target jmp-reg)))) (define asm-activate-thread (let ([target `(x86_64-call 0 (entry ,(lookup-c-entry activate-thread)))]) (lambda (code* jmp-reg) (asm-helper-call code* target jmp-reg)))) (define asm-deactivate-thread (let ([target `(x86_64-call 0 (entry ,(lookup-c-entry deactivate-thread)))]) (lambda (code*) (asm-helper-call code* target %rax)))) (define asm-unactivate-thread (let ([target `(x86_64-call 0 (entry ,(lookup-c-entry unactivate-thread)))]) (lambda (code* arg-reg) (asm-helper-call code* target %rax)))) (define asm-indirect-call (lambda (code* t . ignore) ; NB: c-call is already required to be a register or memory operand, so ; no need to build a relocation entry. (Trivit (t) (emit call t code*)))) (define asm-direct-jump (lambda (l offset) (asm-helper-jump '() (make-funcrel 'x86_64-jump l offset)))) (define asm-literal-jump (lambda (info) (asm-helper-jump '() `(x86_64-jump ,(info-literal-offset info) (,(info-literal-type info) ,(info-literal-addr info)))))) (define asm-indirect-jump (lambda (t) (Trivit (t) (safe-assert (record-case t [(imm) (n) (signed-32? n)] [else #t])) (emit jmp t '())))) (define-who asm-return-address (lambda (dest l incr-offset next-addr) (make-rachunk dest l incr-offset next-addr (or (cond [(local-label-offset l) => (lambda (offset) (let ([disp (fx- next-addr (fx- offset incr-offset))]) (and (signed-32? disp) (Trivit (dest) (emit lea `(riprel ,disp) dest '())))))] [else #f]) (asm-move '() dest (with-output-language (L16 Triv) `(label-ref ,l ,incr-offset))))))) (define asm-jump (lambda (l next-addr) (make-gchunk l next-addr (cond [(local-label-offset l) => (lambda (offset) (let ([disp (fx- next-addr offset)]) (safe-assert (signed-32? disp)) (if (fx= disp 0) '() (emit bra `(label ,disp ,l) '()))))] [else ; label must be somewhere above. generate something so that a hard loop ; doesn't get dropped. this also has some chance of being the right size ; for the final branch instruction. (emit bra `(label 0 ,l) '())])))) (define-who asm-conditional-jump (lambda (info l1 l2 next-addr) (define get-disp-opnd (lambda (next-addr l) (cond [(and (local-label? l) (local-label-offset l)) => (lambda (offset) (let ([disp (fx- next-addr offset)]) (safe-assert (signed-32? disp)) (values disp `(label ,disp ,l))))] [else (values 0 `(label 0 ,l))]))) (safe-assert (and (local-label? l1) (local-label? l2))) (let ([type (info-condition-code-type info)] [reversed? (info-condition-code-reversed? info)]) (make-cgchunk info l1 l2 next-addr (let () (define-syntax pred-case (lambda (x) (define build-bop-seq (lambda (bop opnd1 opnd2 l2 body) #`(let ([code* (emit #,bop #,opnd1 code*)]) (let-values ([(disp #,opnd2) (get-disp-opnd (fx+ next-addr (asm-size* code*)) #,l2)]) #,body)))) (define handle-or (lambda (e opnd l) (syntax-case e (or) [(or bop1 bop2) (build-bop-seq #'bop2 opnd opnd l #`(emit bop1 #,opnd code*))] [bop #`(emit bop #,opnd code*)]))) (define handle-reverse (lambda (e opnd l) (syntax-case e (r?) [(r? c1 c2) #`(if reversed? #,(handle-or #'c1 opnd l) #,(handle-or #'c2 opnd l))] [_ (handle-or e opnd l)]))) (define handle-inverse (lambda (e) (syntax-case e (i?) [(i? c1 c2) #`(cond [(fx= disp1 0) #,(handle-reverse #'c1 #'opnd2 #'l2)] [(fx= disp2 0) #,(handle-reverse #'c2 #'opnd1 #'l1)] [else #,(build-bop-seq #'bra #'opnd2 #'opnd1 #'l1 (handle-reverse #'c2 #'opnd1 #'l1))])] [_ #`(cond ; treating e as c1: inverted condition, branching to false label [(fx= disp1 0) #,(handle-reverse e #'opnd2 #'l2)] [else #,(build-bop-seq #'bra #'opnd1 #'opnd2 #'l2 (handle-reverse e #'opnd2 #'l2))])]))) (syntax-case x () [(_ [(pred ...) cl-body] ...) (with-syntax ([(cl-body ...) (map handle-inverse #'(cl-body ...))]) #'(let ([code* '()]) (let-values ([(disp1 opnd1) (get-disp-opnd next-addr l1)] [(disp2 opnd2) (get-disp-opnd next-addr l2)]) (case type [(pred ...) cl-body] ... [else ($oops who "~s branch type is currently unsupported" type)]))))]))) (pred-case [(eq?) (i? bne beq)] [(u<) (i? (r? bls bcc) (r? bhi bcs))] [(<) (i? (r? ble bge) (r? bgt blt))] [(<=) (i? (r? blt bgt) (r? bge ble))] [(>) (i? (r? bge ble) (r? blt bgt))] [(>=) (i? (r? bgt blt) (r? ble bge))] [(overflow multiply-overflow) (i? bvc bvs)] [(carry) (i? bcc bcs)] ; unordered: zf,pf,cf <- 111; gt: 000; lt: 001; eq: 100 ; reversed & inverted: !(fl< y x) = !(fl> x y) iff zf = 1 & cf = 1 [(fl<) bls] ; reversed & inverted: !(fl<= y x) = !(fl>= x y) iff cf = 1 [(fl<=) bcs] ; inverted: !(fl= x y) iff zf = 0 or cf (or pf) = 1 [(fl=) (or bne bcs)])))))) (define asm-data-label (lambda (code* l offset func code-size) (let ([rel (make-funcrel 'abs l offset)]) (cons* rel (aop-cons* `(asm "mrv point:" ,rel) code*))))) (define asm-helper-jump (lambda (code* reloc) (let ([jmp-reg (cons 'reg %ts)]) (emit movi '(imm #xffffffff) jmp-reg (emit jmp jmp-reg (asm-helper-relocation code* reloc)))))) (define asm-kill (lambda (code* dest) code*)) (define asm-helper-call (lambda (code* reloc jmp-reg) (emit movi '(imm #xffffffff) (cons 'reg jmp-reg) (emit call (cons 'reg jmp-reg) (asm-helper-relocation code* reloc))))) (define asm-helper-relocation (lambda (code* reloc) (cons* reloc (aop-cons* `(asm "relocation:" ,reloc) code*)))) (define asm-rp-header (let ([mrv-error `(abs ,(constant code-data-disp) (library-code ,(lookup-libspec values-error)))]) (lambda (code* mrvl fs lpm func code-size) (let ([size (constant-case ptr-bits [(32) 'long] [(64) 'quad])]) (cons* (if (target-fixnum? lpm) `(,size . ,(fix lpm)) `(abs 0 (object ,lpm))) (aop-cons* `(asm livemask: ,(format "~b" lpm)) '(code-top-link) (aop-cons* `(asm code-top-link) `(,size . ,fs) (aop-cons* `(asm "frame size:" ,fs) (if mrvl (asm-data-label code* mrvl 0 func code-size) (cons* mrv-error (aop-cons* `(asm "mrv point:" ,mrv-error) code*))))))))))) (define-syntax asm-enter (lambda (x) (syntax-case x () [(k e) (with-implicit (k %seq %inline) #'(%seq ; adjust to 16-byte boundary, accounting for 8-byte return address pushed by call (set! ,%sp ,(%inline - ,%sp (immediate 8))) ,e))]))) (module (asm-foreign-call asm-foreign-callable) (if-feature windows (begin (define make-vint (lambda () (vector %Carg1 %Carg2 %Carg3 %Carg4))) (define make-vfp (lambda () (vector %Cfparg1 %Cfparg2 %Cfparg3 %Cfparg4)))) (begin (define make-vint (lambda () (vector %Carg1 %Carg2 %Carg3 %Carg4 %Carg5 %Carg6))) (define make-vfp (lambda () (vector %Cfparg1 %Cfparg2 %Cfparg3 %Cfparg4 %Cfparg5 %Cfparg6 %Cfparg7 %Cfparg8))))) (define (align n size) (fxlogand (fx+ n (fx- size 1)) (fx- size))) (define (classify-type type) (nanopass-case (Ltype Type) type [(fp-ftd& ,ftd) (classify-eightbytes ftd)] [else #f])) (define (classified-size type) (nanopass-case (Ltype Type) type [(fp-ftd& ,ftd) ($ftd-size ftd)] [else #f])) ;; classify-eightbytes: returns '(memory) or a nonemtpy list of 'integer/'sse (if-feature windows ;; Windows: either passed in one register or not (define (classify-eightbytes ftd) (cond [($ftd-compound? ftd) (if (memv ($ftd-size ftd) '(1 2 4 8)) '(integer) '(memory))] [(eq? 'float (caar ($ftd->members ftd))) '(sse)] [else '(integer)])) ;; Non-Windows: SYSV ABI is a more general classification of ;; 8-byte segments into 'integer, 'sse, or 'memory modes (define (classify-eightbytes ftd) (define (merge t1 t2) (cond [(eq? t1 t2) t1] [(eq? t1 'no-class) t2] [(eq? t2 'no-class) t1] [(eq? t1 'memory) 'memory] [(eq? t2 'memory) 'memory] [else 'integer])) (cond [(or (> ($ftd-size ftd) 16) ; more than 2 eightbytes => passed in memory (fx= 0 ($ftd-size ftd))) '(memory)] [else (let ([classes (make-vector (fxsrl (align ($ftd-size ftd) 8) 3) 'no-class)]) (let loop ([mbrs ($ftd->members ftd)]) (cond [(null? mbrs) (vector->list classes)] [else (let ([kind (caar mbrs)] [size (cadar mbrs)] [offset (caddar mbrs)]) (cond [(not (fx= offset (align offset size))) ;; misaligned '(memory)] [else (let* ([pos (fxsrl offset 3)] [class (vector-ref classes pos)] [new-class (merge class (if (eq? kind 'float) 'sse 'integer))]) (cond [(eq? new-class 'memory) '(memory)] [else (vector-set! classes pos new-class) (loop (cdr mbrs))]))]))])))]))) (define (count v l) (cond [(null? l) 0] [(eq? (car l) v) (fx+ 1 (count v (cdr l)))] [else (count v (cdr l))])) ;; A result is put in registers if it has up to two ;; eightbytes, each 'integer or 'sse. On Windows, ;; `result-classes` always has only one item. (define (result-fits-in-registers? result-classes) (and result-classes (not (eq? 'memory (car result-classes))) (or (null? (cdr result-classes)) (null? (cddr result-classes))))) ;; An argument is put in registers depending on how many ;; registers are left (define (pass-here-by-stack? classes iint ints ifp fps) (or (eq? 'memory (car classes)) (fx> (fx+ iint ints) 6) (fx> (fx+ ifp fps) 8))) (module (push-registers pop-registers push-registers-size) (define (move-registers regs load?) (define vfp (make-vfp)) (define (fp-reg? reg) (let loop ([i (fx- (vector-length vfp) 1)]) (or (eq? reg (vector-ref vfp i)) (and (fx> i 0) (loop (fx- i 1)))))) (with-output-language (L13 Effect) (let loop ([regs regs] [offset 0]) (let* ([reg (car regs)] [e (cond [(fp-reg? reg) `(inline ,(make-info-loadfl reg) ,(if load? %load-double %store-double) ,%sp ,%zero (immediate ,offset))] [load? `(set! ,reg ,(%mref ,%sp ,offset))] [else `(set! ,(%mref ,%sp ,offset) ,reg)])] [regs (cdr regs)]) (if (null? regs) e `(seq ,e ,(loop regs (fx+ offset 8)))))))) (define (push-registers-size regs) (align (fx* 8 (length regs)) 16)) (define (push-registers regs) (with-output-language (L13 Effect) (%seq (set! ,%sp ,(%inline - ,%sp (immediate ,(push-registers-size regs)))) ,(move-registers regs #f)))) (define (pop-registers regs) (with-output-language (L13 Effect) (%seq ,(move-registers regs #t) (set! ,%sp ,(%inline + ,%sp (immediate ,(push-registers-size regs)))))))) (define (as-c-call e) (if-feature windows (with-output-language (L13 Effect) (%seq (set! ,%sp ,(%inline - ,%sp (immediate 32))) ,e (set! ,%sp ,(%inline + ,%sp (immediate 32))))) e)) (define asm-foreign-call (with-output-language (L13 Effect) (letrec ([load-double-stack (lambda (offset) (lambda (x) ; requires var (%seq (inline ,(make-info-loadfl %flreg1) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) (inline ,(make-info-loadfl %flreg1) ,%store-double ,%sp ,%zero (immediate ,offset)))))] [load-single-stack (lambda (offset) (lambda (x) ; requires var (%seq (inline ,(make-info-loadfl %flreg1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) (inline ,(make-info-loadfl %flreg1) ,%store-single ,%sp ,%zero (immediate ,offset)))))] [load-int-stack (lambda (offset) (lambda (rhs) ; requires rhs `(set! ,(%mref ,%sp ,offset) ,rhs)))] [load-double-reg (lambda (fpreg) (lambda (x) ; requires var `(inline ,(make-info-loadfl fpreg) ,%load-double ,x ,%zero ,(%constant flonum-data-disp))))] [load-double-reg2 (lambda (fpreg ireg) (lambda (x) ; requires var (%seq (inline ,(make-info-loadfl fpreg) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) (set! ,ireg (inline ,(make-info-loadfl fpreg) ,%get-double)))))] [load-single-reg (lambda (fpreg) (lambda (x) ; requires var `(inline ,(make-info-loadfl fpreg) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp))))] [load-int-reg (lambda (type ireg) (lambda (x) (cond [(nanopass-case (Ltype Type) type [(fp-integer ,bits) (fx= bits 32)] [else #f]) ; original code generated movil or movl (nanopass-case (L13 Rhs) x [,t `(set! ,ireg ,(%inline zext32 ,t))] ; here we use ireg twice to get around needing a temporary when ; x is a non-triv right-hand-side [else (%seq (set! ,ireg ,x) (set! ,ireg ,(%inline zext32 ,ireg)))])] [else `(set! ,ireg ,x)])))] [load-content-stack (lambda (offset len) (lambda (x) ; requires var (let loop ([offset offset] [x-offset 0] [len len]) (cond [(= len 0) `(nop)] [(>= len 8) `(seq (set! ,(%mref ,%sp ,offset) (inline ,(make-info-load 'integer-64 #f) ,%load ,x ,%zero (immediate ,x-offset))) ,(loop (fx+ offset 8) (fx+ x-offset 8) (fx- len 8)))] [(>= len 4) `(seq (set! ,(%mref ,%sp ,offset) (inline ,(make-info-load 'integer-32 #f) ,%load ,x ,%zero (immediate ,x-offset))) ,(loop (fx+ offset 4) (fx+ x-offset 4) (fx- len 4)))] [(>= len 2) `(seq (set! ,(%mref ,%sp ,offset) (inline ,(make-info-load 'integer-16 #f) ,%load ,x ,%zero (immediate ,x-offset))) ,(loop (fx+ offset 2) (fx+ x-offset 2) (fx- len 2)))] [else `(set! ,(%mref ,%sp ,offset) (inline ,(make-info-load 'integer-8 #f) ,%load ,x ,%zero (immediate ,x-offset)))]))))] [load-content-regs (lambda (classes size unsigned? iint ifp vint vfp) (lambda (x) ; requires var (let loop ([size size] [iint iint] [ifp ifp] [classes classes] [x-offset 0]) (cond [(null? classes) `(nop)] [(eq? 'sse (car classes)) (cond [(fx= size 4) ;; Must be the last element `(inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%load-single ,x ,%zero (immediate ,x-offset))] [else `(seq (inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%load-double ,x ,%zero (immediate ,x-offset)) ,(loop (fx- size 8) iint (fx+ ifp 1) (cdr classes) (fx+ x-offset 8)))])] ;; Remaining cases are integers: [(>= size 8) `(seq (set! ,(vector-ref vint iint) (inline ,(make-info-load 'integer-64 #f) ,%load ,x ,%zero (immediate ,x-offset))) ,(loop (fx- size 8) (fx+ iint 1) ifp (cdr classes) (fx+ x-offset 8)))] ;; Remaining cases must be the last element [else (let loop ([reg (vector-ref vint iint)] [size size] [x-offset x-offset]) (cond [(= size 4) `(set! ,reg (inline ,(make-info-load (if unsigned? 'unsigned-32 'integer-32) #f) ,%load ,x ,%zero (immediate ,x-offset)))] [(= size 2) `(set! ,reg (inline ,(make-info-load (if unsigned? 'unsigned-16 'integer-16) #f) ,%load ,x ,%zero (immediate ,x-offset)))] [(= size 1) `(set! ,reg (inline ,(make-info-load (if unsigned? 'unsigned-8 'integer-8) #f) ,%load ,x ,%zero (immediate ,x-offset)))] [(> size 4) ;; 5, 6, or 7: multiple steps to avoid reading too many bytes (let ([tmp %rax]) ;; ?? ok to use %rax? (%seq ,(loop reg (fx- size 4) (fx+ x-offset 4)) (set! ,reg ,(%inline sll ,reg (immediate 32))) ,(loop tmp 4 x-offset) (set! ,reg ,(%inline + ,reg ,tmp))))] [else ;; 3: multiple steps to avoid reading too many bytes (let ([tmp %rax]) ;; ?? ok to use %rax? (%seq ,(loop reg (fx- size 2) (fx+ x-offset 2)) (set! ,reg ,(%inline sll ,reg (immediate 16))) ,(loop tmp 2 x-offset) (set! ,reg ,(%inline + ,reg ,tmp))))]))]))))] [add-regs (lambda (ints ir vr regs) (cond [(fx= 0 ints) regs] [else (add-regs (fx- ints 1) (fx+ ir 1) vr (cons (vector-ref vr ir) regs))]))] [do-args (lambda (types vint vfp) (if-feature windows (let loop ([types types] [locs '()] [regs '()] [fp-regs '()] [i 0] [isp 0]) (if (null? types) (values isp 0 locs regs fp-regs) (nanopass-case (Ltype Type) (car types) [(fp-double-float) (if (< i 4) (let ([reg (vector-ref vint i)]) (loop (cdr types) (cons (load-double-reg2 (vector-ref vfp i) reg) locs) (cons reg regs) (cons (vector-ref vfp i) fp-regs) (fx+ i 1) isp)) (loop (cdr types) (cons (load-double-stack isp) locs) regs fp-regs i (fx+ isp 8)))] [(fp-single-float) (if (< i 4) (loop (cdr types) (cons (load-single-reg (vector-ref vfp i)) locs) regs (cons (vector-ref vfp i) fp-regs) (fx+ i 1) isp) (loop (cdr types) (cons (load-single-stack isp) locs) regs fp-regs i (fx+ isp 8)))] [(fp-ftd& ,ftd) (cond [(memv ($ftd-size ftd) '(1 2 4 8)) ;; pass as value in register or as value on the stack (cond [(< i 4) ;; pass as value in register (cond [(and (not ($ftd-compound? ftd)) (eq? 'float (caar ($ftd->members ftd)))) ;; float or double (loop (cdr types) (cons (load-content-regs '(sse) ($ftd-size ftd) #t i i vint vfp) locs) (add-regs 1 i vint regs) (add-regs 1 i vfp fp-regs) (fx+ i 1) isp)] [else ;; integer (loop (cdr types) (cons (load-content-regs '(integer) ($ftd-size ftd) ($ftd-unsigned? ftd) i i vint vfp) locs) (add-regs 1 i vint regs) fp-regs(fx+ i 1) isp)])] [else ;; pass as value on the stack (loop (cdr types) (cons (load-content-stack isp ($ftd-size ftd)) locs) regs fp-regs i (fx+ isp (align ($ftd-size ftd) 8)))])] [else ;; pass by reference in register or by reference on the stack (cond [(< i 4) ;; pass by reference in a register (let ([reg (vector-ref vint i)]) (loop (cdr types) (cons (load-int-reg (car types) reg) locs) (cons reg regs) fp-regs (fx+ i 1) isp))] [else ;; pass by reference on the stack (loop (cdr types) (cons (load-int-stack isp) locs) regs fp-regs i (fx+ isp 8))])])] [else (if (< i 4) (let ([reg (vector-ref vint i)]) (loop (cdr types) (cons (load-int-reg (car types) reg) locs) (cons reg regs) fp-regs (fx+ i 1) isp)) (loop (cdr types) (cons (load-int-stack isp) locs) regs fp-regs i (fx+ isp 8)))]))) (let loop ([types types] [locs '()] [regs '()] [fp-regs '()] [iint 0] [ifp 0] [isp 0]) (if (null? types) (values isp ifp locs regs fp-regs) (nanopass-case (Ltype Type) (car types) [(fp-double-float) (if (< ifp 8) (loop (cdr types) (cons (load-double-reg (vector-ref vfp ifp)) locs) regs (cons (vector-ref vfp ifp) fp-regs) iint (fx+ ifp 1) isp) (loop (cdr types) (cons (load-double-stack isp) locs) regs fp-regs iint ifp (fx+ isp 8)))] [(fp-single-float) (if (< ifp 8) (loop (cdr types) (cons (load-single-reg (vector-ref vfp ifp)) locs) regs (cons (vector-ref vfp ifp) fp-regs) iint (fx+ ifp 1) isp) (loop (cdr types) (cons (load-single-stack isp) locs) regs fp-regs iint ifp (fx+ isp 8)))] [(fp-ftd& ,ftd) (let* ([classes (classify-eightbytes ftd)] [ints (count 'integer classes)] [fps (count 'sse classes)]) (cond [(pass-here-by-stack? classes iint ints ifp fps) ;; pass on the stack (loop (cdr types) (cons (load-content-stack isp ($ftd-size ftd)) locs) regs fp-regs iint ifp (fx+ isp (align ($ftd-size ftd) 8)))] [else ;; pass in registers (loop (cdr types) (cons (load-content-regs classes ($ftd-size ftd) ($ftd-unsigned? ftd) iint ifp vint vfp) locs) (add-regs ints iint vint regs) (add-regs fps ifp vfp fp-regs) (fx+ iint ints) (fx+ ifp fps) isp)]))] [else (if (< iint 6) (let ([reg (vector-ref vint iint)]) (loop (cdr types) (cons (load-int-reg (car types) reg) locs) (cons reg regs) fp-regs (fx+ iint 1) ifp isp)) (loop (cdr types) (cons (load-int-stack isp) locs) regs fp-regs iint ifp (fx+ isp 8)))])))))]) (define (add-deactivate adjust-active? t0 live* result-live* e) (cond [adjust-active? (let ([save-and-restore (lambda (regs e) (cond [(null? regs) e] [else (%seq ,(push-registers regs) ,e ,(pop-registers regs))]))]) (%seq (set! ,%deact ,t0) ,(save-and-restore (cons %deact live*) (as-c-call (%inline deactivate-thread))) ,e ,(save-and-restore result-live* (as-c-call `(set! ,%rax ,(%inline activate-thread))))))] [else e])) (define (add-save-fill-target fill-result-here? frame-size locs) (cond [fill-result-here? ;; The callee isn't expecting a pointer to fill with the result. ;; Stash the pointer as an extra argument, and then when the ;; function returns, we'll move register content for the result ;; into the pointer's target (values (fx+ frame-size (constant ptr-bytes)) (append locs (list (lambda (x) ; requires var `(set! ,(%mref ,%sp ,frame-size) ,x)))))] [else (values frame-size locs)])) (define (add-fill-result c-call saved-offset classes size) (let loop ([classes classes] [offset 0] [iregs (reg-list %rax %rdx)] [fpregs (reg-list %Cfparg1 %Cfparg2)] [size size]) (cond [(null? classes) `(seq ,c-call (set! ,%rcx ,(%mref ,%sp ,saved-offset)))] [(eq? 'sse (car classes)) `(seq ,(loop (cdr classes) (fx+ offset 8) iregs (cdr fpregs) (fx- size 8)) ,(case size [(4) `(inline ,(make-info-loadfl (car fpregs)) ,%store-single ,%rcx ,%zero (immediate ,offset))] [else `(inline ,(make-info-loadfl (car fpregs)) ,%store-double ,%rcx ,%zero (immediate ,offset))]))] [else `(seq ,(loop (cdr classes) (fx+ offset 8) (cdr iregs) fpregs (fx- size 8)) ,(let ([ireg (car iregs)]) (case size [(1) `(inline ,(make-info-load 'integer-8 #f) ,%store ,%rcx ,%zero (immediate ,offset) ,ireg)] [(2) `(inline ,(make-info-load 'integer-16 #f) ,%store ,%rcx ,%zero (immediate ,offset) ,ireg)] [(3) (%seq (inline ,(make-info-load 'integer-16 #f) ,%store ,%rcx ,%zero (immediate ,offset) ,ireg) (set! ,ireg ,(%inline srl ,ireg (immediate 16))) (inline ,(make-info-load 'integer-8 #f) ,%store ,%rcx ,%zero (immediate ,(fx+ 2 offset)) ,ireg))] [(4) `(inline ,(make-info-load 'integer-32 #f) ,%store ,%rcx ,%zero (immediate ,offset) ,ireg)] [(5 6 7) (%seq (inline ,(make-info-load 'integer-32 #f) ,%store ,%rcx ,%zero (immediate ,offset) ,ireg) (set! ,ireg ,(%inline srl ,ireg (immediate 32))) ,(case size [(5) `(inline ,(make-info-load 'integer-8 #f) ,%store ,%rcx ,%zero (immediate ,(fx+ 4 offset)) ,ireg)] [(6) `(inline ,(make-info-load 'integer-16 #f) ,%store ,%rcx ,%zero (immediate ,(fx+ 4 offset)) ,ireg)] [(7) (%seq (inline ,(make-info-load 'integer-16 #f) ,%store ,%rcx ,%zero (immediate ,(fx+ 4 offset)) ,ireg) (set! ,ireg ,(%inline srl ,ireg (immediate 16))) (inline ,(make-info-load 'integer-8 #f) ,%store ,%rcx ,%zero (immediate ,(fx+ 6 offset)) ,ireg))]))] [else `(set! ,(%mref ,%rcx ,offset) ,ireg)])))]))) (define (get-result-regs fill-result-here? result-type result-classes) (if fill-result-here? (let loop ([classes result-classes] [iregs (reg-list %rax %rdx)] [fpregs (reg-list %Cfparg1 %Cfparg2)]) (cond [(null? classes) '()] [(eq? 'sse (car classes)) (cons (car fpregs) (loop (cdr classes) iregs (cdr fpregs)))] [else (cons (car iregs) (loop (cdr classes) (cdr iregs) fpregs))])) (nanopass-case (Ltype Type) result-type [(fp-double-float) (list %Cfpretval)] [(fp-single-float) (list %Cfpretval)] [(fp-void) '()] [else (list %rax)]))) (define returnem (lambda (frame-size locs ccall r-loc) ; need to maintain 16-byte alignment, ignoring the return address ; pushed by call instruction, which counts as part of callee's frame ; tc is callee-save; no need to save (let ([frame-size (logand (+ frame-size 15) -16)]) (values (lambda () (if (fx= frame-size 0) `(nop) `(set! ,%sp ,(%inline - ,%sp (immediate ,frame-size))))) (reverse locs) ccall r-loc (lambda () (if (fx= frame-size 0) `(nop) `(set! ,%sp ,(%inline + ,%sp (immediate ,frame-size))))))))) (lambda (info) (safe-assert (reg-callee-save? %tc)) ; no need to save-restore (let* ([conv* (info-foreign-conv* info)] [arg-type* (info-foreign-arg-type* info)] [result-type (info-foreign-result-type info)] [result-classes (classify-type result-type)] [result-size (classified-size result-type)] [fill-result-here? (result-fits-in-registers? result-classes)] [adjust-active? (if-feature pthreads (memq 'adjust-active conv*) #f)]) (with-values (do-args (if fill-result-here? (cdr arg-type*) arg-type*) (make-vint) (make-vfp)) (lambda (frame-size nfp locs live* fp-live*) (with-values (add-save-fill-target fill-result-here? frame-size locs) (lambda (frame-size locs) (returnem frame-size locs (lambda (t0) (let* ([t (if adjust-active? %deact t0)] ; need a register if `adjust-active?` [c-call (add-deactivate adjust-active? t0 (append fp-live* live*) (get-result-regs fill-result-here? result-type result-classes) (if-feature windows (%seq (set! ,%sp ,(%inline - ,%sp (immediate 32))) (inline ,(make-info-kill*-live* (reg-list %rax %rdx) live*) ,%c-call ,t) (set! ,%sp ,(%inline + ,%sp (immediate 32)))) (%seq ;; System V ABI varargs functions require count of fp regs used in %al register. ;; since we don't know if the callee is a varargs function, we always set it. (set! ,%rax (immediate ,nfp)) (inline ,(make-info-kill*-live* (reg-list %rax %rdx) (cons %rax live*)) ,%c-call ,t))))]) (cond [fill-result-here? (add-fill-result c-call (fx- frame-size (constant ptr-bytes)) result-classes result-size)] [else c-call]))) (nanopass-case (Ltype Type) result-type [(fp-double-float) (lambda (lvalue) `(inline ,(make-info-loadfl %Cfpretval) ,%store-double ,lvalue ,%zero ,(%constant flonum-data-disp)))] [(fp-single-float) (lambda (lvalue) `(inline ,(make-info-loadfl %Cfpretval) ,%store-single->double ,lvalue ,%zero ,(%constant flonum-data-disp)))] [(fp-integer ,bits) (case bits [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline sext8 ,%rax)))] [(16) (lambda (lvalue) `(set! ,lvalue ,(%inline sext16 ,%rax)))] [(32) (lambda (lvalue) `(set! ,lvalue ,(%inline sext32 ,%rax)))] [(64) (lambda (lvalue) `(set! ,lvalue ,%rax))] [else ($oops 'assembler-internal "unexpected asm-foreign-procedures fp-integer size ~s" bits)])] [(fp-unsigned ,bits) (case bits [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline zext8 ,%rax)))] [(16) (lambda (lvalue) `(set! ,lvalue ,(%inline zext16 ,%rax)))] [(32) (lambda (lvalue) `(set! ,lvalue ,(%inline zext32 ,%rax)))] [(64) (lambda (lvalue) `(set! ,lvalue ,%rax))] [else ($oops 'assembler-internal "unexpected asm-foreign-procedures fp-unsigned size ~s" bits)])] [else (lambda (lvalue) `(set! ,lvalue ,%rax))]))))))))))) (define asm-foreign-callable #| Windows: Frame Layout +---------------------------+ | | | incoming stack args | | | +---------------------------+ <- 16-byte boundary | | | space for register args | four quads sp+80/96: | | +---------------------------+ <- 16-byte boundary | incoming return address | one quad incoming sp: +---------------------------+ sp+72: | active state | zero or two quads +---------------------------+ | | | callee-save registers | RBX, RBP, RDI, RSI, R12, R13, R14, R15 (8 quads) | | +---------------------------+ | pad word / indirect space | one quad sp+0: +---------------------------+<- 16-byte boundary Standard: Frame Layout +---------------------------+ | | | incoming stack args | sp+192: | | +---------------------------+ <- 16-byte boundary | incoming return address | one quad +---------------------------+ sp+176: | pad word / active state | one quad +---------------------------+ | indirect result space | two quads sp+160: | (for & results via regs) | +---------------------------+<- 16-byte boundary | | | saved register args | space for Carg*, Cfparg* (14 quads) sp+48: | | +---------------------------+<- 16-byte boundary | | | callee-save registers | RBX, RBP, R12, R13, R14, R15 (6 quads) | | sp+0: +---------------------------+<- 16-byte boundary |# (with-output-language (L13 Effect) (let () (define load-double-stack (lambda (offset) (lambda (x) ; requires var (%seq (inline ,(make-info-loadfl %flreg1) ,%load-double ,%sp ,%zero (immediate ,offset)) (inline ,(make-info-loadfl %flreg1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) (define load-single-stack (lambda (offset) (lambda (x) ; requires var (%seq (inline ,(make-info-loadfl %flreg1) ,%load-single->double ,%sp ,%zero (immediate ,offset)) (inline ,(make-info-loadfl %flreg1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) (define load-int-stack (lambda (type offset) (lambda (lvalue) (nanopass-case (Ltype Type) type [(fp-integer ,bits) (case bits [(8) `(set! ,lvalue (inline ,(make-info-load 'integer-8 #f) ,%load ,%sp ,%zero (immediate ,offset)))] [(16) `(set! ,lvalue (inline ,(make-info-load 'integer-16 #f) ,%load ,%sp ,%zero (immediate ,offset)))] [(32) `(set! ,lvalue (inline ,(make-info-load 'integer-32 #f) ,%load ,%sp ,%zero (immediate ,offset)))] [(64) `(set! ,lvalue ,(%mref ,%sp ,offset))] [else ($oops 'assembler-internal "unexpected load-int-stack fp-integer size ~s" bits)])] [(fp-unsigned ,bits) (case bits [(8) `(set! ,lvalue (inline ,(make-info-load 'unsigned-8 #f) ,%load ,%sp ,%zero (immediate ,offset)))] [(16) `(set! ,lvalue (inline ,(make-info-load 'unsigned-16 #f) ,%load ,%sp ,%zero (immediate ,offset)))] [(32) `(set! ,lvalue (inline ,(make-info-load 'unsigned-32 #f) ,%load ,%sp ,%zero (immediate ,offset)))] [(64) `(set! ,lvalue ,(%mref ,%sp ,offset))] [else ($oops 'assembler-internal "unexpected load-int-stack fp-unsigned size ~s" bits)])] [else `(set! ,lvalue ,(%mref ,%sp ,offset))])))) (define load-stack-address (lambda (offset) (lambda (lvalue) ; requires lvalue `(set! ,lvalue ,(%inline + ,%sp (immediate ,offset)))))) (define save-arg-regs (lambda (types) (define vint (make-vint)) (define vfp (make-vfp)) (if-feature windows (let f ([types types] [i 0] [isp 8]) (if (or (null? types) (fx= i 4)) `(nop) (nanopass-case (Ltype Type) (car types) [(fp-double-float) (if (< i 4) (%seq (inline ,(make-info-loadfl (vector-ref vfp i)) ,%store-double ,%sp ,%zero (immediate ,isp)) ,(f (cdr types) (fx+ i 1) (fx+ isp 8))) (f (cdr types) i isp))] [(fp-single-float) (if (< i 4) (%seq (inline ,(make-info-loadfl (vector-ref vfp i)) ,%store-single ,%sp ,%zero (immediate ,isp)) ,(f (cdr types) (fx+ i 1) (fx+ isp 8))) (f (cdr types) i isp))] [(fp-ftd& ,ftd) (cond [(memv ($ftd-size ftd) '(1 2 4 8)) ;; receive as value in register or on the stack (cond [(< i 4) ;; receive in register (cond [(and (not ($ftd-compound? ftd)) (eq? 'float (caar ($ftd->members ftd)))) ;; float or double `(seq (inline ,(make-info-loadfl (vector-ref vfp i)) ,%store-double ,%sp ,%zero (immediate ,isp)) ,(f (cdr types) (fx+ i 1) (fx+ isp 8)))] [else ;; integer `(seq (set! ,(%mref ,%sp ,isp) ,(vector-ref vint i)) ,(f (cdr types) (fx+ i 1) (fx+ isp 8)))])] [else ;; receive by value on the stack (f (cdr types) i isp)])] [else ;; receive by reference in register or on the stack (cond [(< i 4) ;; receive by reference in register `(seq (set! ,(%mref ,%sp ,isp) ,(vector-ref vint i)) ,(f (cdr types) (fx+ i 1) (fx+ isp 8)))] [else ;; receive by reference on the stack (f (cdr types) i isp)])])] [else (if (< i 4) (%seq (set! ,(%mref ,%sp ,isp) ,(vector-ref vint i)) ,(f (cdr types) (fx+ i 1) (fx+ isp 8))) (f (cdr types) i isp))]))) (let f ([types types] [iint 0] [ifp 0] [isp 48]) (if (or (null? types) (and (fx>= iint 6) (fx>= ifp 8))) `(nop) (nanopass-case (Ltype Type) (car types) [(fp-double-float) (if (< ifp 8) (%seq (inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%store-double ,%sp ,%zero (immediate ,isp)) ,(f (cdr types) iint (fx+ ifp 1) (fx+ isp 8))) (f (cdr types) iint ifp isp))] [(fp-single-float) (if (< ifp 8) (%seq (inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%store-single ,%sp ,%zero (immediate ,isp)) ,(f (cdr types) iint (fx+ ifp 1) (fx+ isp 8))) (f (cdr types) iint ifp isp))] [(fp-ftd& ,ftd) (let* ([classes (classify-eightbytes ftd)] [ints (count 'integer classes)] [fps (count 'sse classes)]) (cond [(pass-here-by-stack? classes iint ints ifp fps) ;; receive on the stack (f (cdr types) iint ifp isp)] [else ;; receive via registers (let reg-loop ([classes classes] [iint iint] [ifp ifp] [isp isp]) (cond [(null? classes) (f (cdr types) iint ifp isp)] [(eq? (car classes) 'sse) `(seq (inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%store-double ,%sp ,%zero (immediate ,isp)) ,(reg-loop (cdr classes) iint (fx+ ifp 1) (+ isp 8)))] [else `(seq (set! ,(%mref ,%sp ,isp) ,(vector-ref vint iint)) ,(reg-loop (cdr classes) (fx+ iint 1) ifp (+ isp 8)))]))]))] [else (if (< iint 6) (%seq (set! ,(%mref ,%sp ,isp) ,(vector-ref vint iint)) ,(f (cdr types) (fx+ iint 1) ifp (fx+ isp 8))) (f (cdr types) iint ifp isp))])))))) (define do-stack (lambda (types adjust-active?) ; risp is where incoming register args are stored ; sisp is where incoming stack args are stored (if-feature windows (let f ([types types] [locs '()] [isp (if adjust-active? 96 80)]) (if (null? types) locs (f (cdr types) (cons (nanopass-case (Ltype Type) (car types) [(fp-double-float) (load-double-stack isp)] [(fp-single-float) (load-single-stack isp)] [(fp-ftd& ,ftd) (cond [(memq ($ftd-size ftd) '(1 2 4 8)) ;; passed by value (load-stack-address isp)] [else ;; passed by reference (load-int-stack (car types) isp)])] [else (load-int-stack (car types) isp)]) locs) (fx+ isp 8)))) (let f ([types types] [locs '()] [iint 0] [ifp 0] [risp 48] [sisp 192]) (if (null? types) locs (nanopass-case (Ltype Type) (car types) [(fp-double-float) (if (= ifp 8) (f (cdr types) (cons (load-double-stack sisp) locs) iint ifp risp (fx+ sisp 8)) (f (cdr types) (cons (load-double-stack risp) locs) iint (fx+ ifp 1) (fx+ risp 8) sisp))] [(fp-single-float) (if (= ifp 8) (f (cdr types) (cons (load-single-stack sisp) locs) iint ifp risp (fx+ sisp 8)) (f (cdr types) (cons (load-single-stack risp) locs) iint (fx+ ifp 1) (fx+ risp 8) sisp))] [(fp-ftd& ,ftd) (let* ([classes (classify-eightbytes ftd)] [ints (count 'integer classes)] [fps (count 'sse classes)]) (cond [(pass-here-by-stack? classes iint ints ifp fps) ;; receive on the stack (f (cdr types) (cons (load-stack-address sisp) locs) iint ifp risp (fx+ sisp ($ftd-size ftd)))] [else ;; receive via registers; `save-args-regs` has saved ;; the registers in a suitable order so that the data ;; is contiguous on the stack (f (cdr types) (cons (load-stack-address risp) locs) (fx+ iint ints) (fx+ ifp fps) (fx+ risp (fx* 8 (fx+ ints fps))) sisp)]))] [else (if (= iint 6) (f (cdr types) (cons (load-int-stack (car types) sisp) locs) iint ifp risp (fx+ sisp 8)) (f (cdr types) (cons (load-int-stack (car types) risp) locs) (fx+ iint 1) ifp (fx+ risp 8) sisp))])))))) (define (do-result result-type result-classes adjust-active?) (nanopass-case (Ltype Type) result-type [(fp-ftd& ,ftd) (cond [(result-fits-in-registers? result-classes) ;; Copy content of result area on stack into ;; the integer and floating-point registers (let loop ([result-classes result-classes] [offset (if-feature windows 0 160)] [int* (list %rax %rdx)] [fp* (list %Cfpretval %Cfparg2)] [accum '()] [live* '()] [fp-live* '()]) (cond [(null? result-classes) (values (lambda () (if (pair? (cdr accum)) `(seq ,(car accum) ,(cadr accum)) (car accum))) live* fp-live*)] [(eq? (car result-classes) 'integer) (loop (cdr result-classes) (fx+ offset 8) (cdr int*) fp* (cons `(set! ,(car int*) ,(%mref ,%sp ,offset)) accum) (cons (car int*) live*) fp-live*)] [(eq? (car result-classes) 'sse) (loop (cdr result-classes) (fx+ offset 8) int* (cdr fp*) (cons `(inline ,(make-info-loadfl (car fp*)) ,%load-double ,%sp ,%zero (immediate ,offset)) accum) live* (cons (car fp*) fp-live*))]))] [else (values (lambda () ;; Return pointer that was filled; destination was the first argument `(set! ,%Cretval ,(%mref ,%sp ,(if-feature windows (if adjust-active? 96 80) 48)))) (list %Cretval) '())])] [(fp-double-float) (values (lambda (x) `(inline ,(make-info-loadfl %Cfpretval) ,%load-double ,x ,%zero ,(%constant flonum-data-disp))) '() (list %Cfpretval))] [(fp-single-float) (values (lambda (x) `(inline ,(make-info-loadfl %Cfpretval) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp))) '() (list %Cfpretval))] [(fp-void) (values (lambda () `(nop)) '() '())] [else (values (lambda (x) `(set! ,%Cretval ,x)) (list %Cretval) '())])) (define (unactivate result-regs) (let ([e `(seq (set! ,%Carg1 ,(%mref ,%sp ,(+ (push-registers-size result-regs) (if-feature windows 72 176)))) ,(as-c-call (%inline unactivate-thread ,%Carg1)))]) (if (null? result-regs) e (%seq ,(push-registers result-regs) ,e ,(pop-registers result-regs))))) (lambda (info) (let ([conv* (info-foreign-conv* info)] [arg-type* (info-foreign-arg-type* info)] [result-type (info-foreign-result-type info)]) (let* ([result-classes (classify-type result-type)] [adjust-active? (if-feature pthreads (memq 'adjust-active conv*) #f)] [synthesize-first? (and result-classes (result-fits-in-registers? result-classes))] [locs (do-stack (if synthesize-first? (cdr arg-type*) arg-type*) adjust-active?)]) (let-values ([(get-result result-regs result-fp-regs) (do-result result-type result-classes adjust-active?)]) (values (lambda () (%seq ,(if-feature windows (%seq ,(let ([e (save-arg-regs arg-type*)]) (if adjust-active? (%seq ,e (set! ,%sp ,(%inline - ,%sp (immediate 16)))) e)) ,(%inline push ,%rbx) ,(%inline push ,%rbp) ,(%inline push ,%rdi) ,(%inline push ,%rsi) ,(%inline push ,%r12) ,(%inline push ,%r13) ,(%inline push ,%r14) ,(%inline push ,%r15) (set! ,%sp ,(%inline - ,%sp (immediate 8)))) (%seq (set! ,%sp ,(%inline - ,%sp (immediate 136))) ,(%inline push ,%rbx) ,(%inline push ,%rbp) ,(%inline push ,%r12) ,(%inline push ,%r13) ,(%inline push ,%r14) ,(%inline push ,%r15) ,(save-arg-regs arg-type*))) ,(if-feature pthreads ((lambda (e) (if adjust-active? (%seq ,(as-c-call `(set! ,%rax ,(%inline activate-thread))) (set! ,(%mref ,%sp ,(if-feature windows 72 176)) ,%rax) ,e) e)) (%seq (set! ,%rax ,(%inline get-tc)) (set! ,%tc ,%rax))) `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0)))))) (let ([locs (reverse locs)]) (if synthesize-first? (cons (load-stack-address (if-feature windows 0 160)) ; space on stack for results to be returned via registers locs) locs)) get-result (lambda () (define callee-save-regs (if-feature windows (list %rbx %rbp %rdi %rsi %r12 %r13 %r14 %r15) (list %rbx %rbp %r12 %r13 %r14 %r15))) (in-context Tail ((lambda (e) (if adjust-active? (%seq ,(unactivate (append result-fp-regs result-regs)) ,e) e)) (%seq ,(if-feature windows ((lambda (e) (if adjust-active? (%seq ,e (set! ,%sp ,(%inline + ,%sp (immediate 16)))) e)) (%seq (set! ,%sp ,(%inline + ,%sp (immediate 8))) (set! ,%r15 ,(%inline pop)) (set! ,%r14 ,(%inline pop)) (set! ,%r13 ,(%inline pop)) (set! ,%r12 ,(%inline pop)) (set! ,%rsi ,(%inline pop)) (set! ,%rdi ,(%inline pop)) (set! ,%rbp ,(%inline pop)) (set! ,%rbx ,(%inline pop)))) (%seq (set! ,%r15 ,(%inline pop)) (set! ,%r14 ,(%inline pop)) (set! ,%r13 ,(%inline pop)) (set! ,%r12 ,(%inline pop)) (set! ,%rbp ,(%inline pop)) (set! ,%rbx ,(%inline pop)) (set! ,%sp ,(%inline + ,%sp (immediate 136))))) (asm-c-return ,null-info ,callee-save-regs ... ,result-regs ...)))))))))))))) )