Bugzilla – Attachment 216 Details for
Bug 357
Poor fixed-size vectorizable performance with MSVC 2010
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
Log In
[x]
|
Forgot Password
Login:
[x]
This bugzilla service is closed. All entries have been migrated to
https://gitlab.com/libeigen/eigen
GCC Assembler output for test case.
TestEigen.GCC.ASM (text/plain), 9.84 KB, created by
Colm
on 2011-10-04 20:48:33 UTC
(
hide
)
Description:
GCC Assembler output for test case.
Filename:
MIME Type:
Creator:
Colm
Created:
2011-10-04 20:48:33 UTC
Size:
9.84 KB
patch
obsolete
> .file "TestEigen.cpp" > .text > .p2align 4,,15 > .def __tcf_0; .scl 3; .type 32; .endef > .seh_proc __tcf_0 >__tcf_0: > .seh_endprologue > leaq _ZStL8__ioinit(%rip), %rcx > jmp _ZNSt8ios_base4InitD1Ev > .seh_endproc > .section .text$_ZNK5Eigen17CoeffBasedProductIRKNS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEES4_Li6EEcvS4_Ev,"x" > .linkonce discard > .align 2 > .p2align 4,,15 > .globl _ZNK5Eigen17CoeffBasedProductIRKNS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEES4_Li6EEcvS4_Ev > .def _ZNK5Eigen17CoeffBasedProductIRKNS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEES4_Li6EEcvS4_Ev; .scl 2; .type 32; .endef > .seh_proc _ZNK5Eigen17CoeffBasedProductIRKNS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEES4_Li6EEcvS4_Ev >_ZNK5Eigen17CoeffBasedProductIRKNS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEES4_Li6EEcvS4_Ev: > .seh_endprologue > movq 8(%rcx), %r8 > movq (%rcx), %rdx > leaq 16(%rcx), %rax > movsd (%r8), %xmm1 > movsd 8(%r8), %xmm0 > unpcklpd %xmm1, %xmm1 > unpcklpd %xmm0, %xmm0 > mulpd (%rdx), %xmm1 > mulpd 32(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movsd 16(%r8), %xmm1 > unpcklpd %xmm1, %xmm1 > mulpd 64(%rdx), %xmm1 > addpd %xmm0, %xmm1 > movsd 24(%r8), %xmm0 > unpcklpd %xmm0, %xmm0 > mulpd 96(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movapd %xmm0, 16(%rcx) > movsd (%r8), %xmm1 > movsd 8(%r8), %xmm0 > unpcklpd %xmm1, %xmm1 > unpcklpd %xmm0, %xmm0 > mulpd 16(%rdx), %xmm1 > mulpd 48(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movsd 16(%r8), %xmm1 > unpcklpd %xmm1, %xmm1 > mulpd 80(%rdx), %xmm1 > addpd %xmm0, %xmm1 > movsd 24(%r8), %xmm0 > unpcklpd %xmm0, %xmm0 > mulpd 112(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movapd %xmm0, 32(%rcx) > movsd 32(%r8), %xmm1 > movsd 40(%r8), %xmm0 > unpcklpd %xmm1, %xmm1 > unpcklpd %xmm0, %xmm0 > mulpd (%rdx), %xmm1 > mulpd 32(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movsd 48(%r8), %xmm1 > unpcklpd %xmm1, %xmm1 > mulpd 64(%rdx), %xmm1 > addpd %xmm0, %xmm1 > movsd 56(%r8), %xmm0 > unpcklpd %xmm0, %xmm0 > mulpd 96(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movapd %xmm0, 48(%rcx) > movsd 32(%r8), %xmm1 > movsd 40(%r8), %xmm0 > unpcklpd %xmm1, %xmm1 > unpcklpd %xmm0, %xmm0 > mulpd 16(%rdx), %xmm1 > mulpd 48(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movsd 48(%r8), %xmm1 > unpcklpd %xmm1, %xmm1 > mulpd 80(%rdx), %xmm1 > addpd %xmm0, %xmm1 > movsd 56(%r8), %xmm0 > unpcklpd %xmm0, %xmm0 > mulpd 112(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movapd %xmm0, 64(%rcx) > movsd 64(%r8), %xmm1 > movsd 72(%r8), %xmm0 > unpcklpd %xmm1, %xmm1 > unpcklpd %xmm0, %xmm0 > mulpd (%rdx), %xmm1 > mulpd 32(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movsd 80(%r8), %xmm1 > unpcklpd %xmm1, %xmm1 > mulpd 64(%rdx), %xmm1 > addpd %xmm0, %xmm1 > movsd 88(%r8), %xmm0 > unpcklpd %xmm0, %xmm0 > mulpd 96(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movapd %xmm0, 80(%rcx) > movsd 64(%r8), %xmm1 > movsd 72(%r8), %xmm0 > unpcklpd %xmm1, %xmm1 > unpcklpd %xmm0, %xmm0 > mulpd 16(%rdx), %xmm1 > mulpd 48(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movsd 80(%r8), %xmm1 > unpcklpd %xmm1, %xmm1 > mulpd 80(%rdx), %xmm1 > addpd %xmm0, %xmm1 > movsd 88(%r8), %xmm0 > unpcklpd %xmm0, %xmm0 > mulpd 112(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movapd %xmm0, 96(%rcx) > movsd 96(%r8), %xmm1 > movsd 104(%r8), %xmm0 > unpcklpd %xmm1, %xmm1 > unpcklpd %xmm0, %xmm0 > mulpd (%rdx), %xmm1 > mulpd 32(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movsd 112(%r8), %xmm1 > unpcklpd %xmm1, %xmm1 > mulpd 64(%rdx), %xmm1 > addpd %xmm0, %xmm1 > movsd 120(%r8), %xmm0 > unpcklpd %xmm0, %xmm0 > mulpd 96(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movapd %xmm0, 112(%rcx) > movsd 96(%r8), %xmm1 > movsd 104(%r8), %xmm0 > unpcklpd %xmm1, %xmm1 > unpcklpd %xmm0, %xmm0 > mulpd 16(%rdx), %xmm1 > mulpd 48(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movsd 112(%r8), %xmm1 > unpcklpd %xmm1, %xmm1 > mulpd 80(%rdx), %xmm1 > addpd %xmm0, %xmm1 > movsd 120(%r8), %xmm0 > unpcklpd %xmm0, %xmm0 > mulpd 112(%rdx), %xmm0 > addpd %xmm1, %xmm0 > movapd %xmm0, 128(%rcx) > ret > .seh_endproc > .section .text$_ZN5Eigen8internal40assign_LinearTraversal_CompleteUnrollingINS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEENS_14CwiseNullaryOpINS0_16scalar_random_opIdEES3_EELi3ELi16EE3runERS3_RKS7_,"x" > .linkonce discard > .p2align 4,,15 > .globl _ZN5Eigen8internal40assign_LinearTraversal_CompleteUnrollingINS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEENS_14CwiseNullaryOpINS0_16scalar_random_opIdEES3_EELi3ELi16EE3runERS3_RKS7_ > .def _ZN5Eigen8internal40assign_LinearTraversal_CompleteUnrollingINS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEENS_14CwiseNullaryOpINS0_16scalar_random_opIdEES3_EELi3ELi16EE3runERS3_RKS7_; .scl 2; .type 32; .endef > .seh_proc _ZN5Eigen8internal40assign_LinearTraversal_CompleteUnrollingINS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEENS_14CwiseNullaryOpINS0_16scalar_random_opIdEES3_EELi3ELi16EE3runERS3_RKS7_ >_ZN5Eigen8internal40assign_LinearTraversal_CompleteUnrollingINS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEENS_14CwiseNullaryOpINS0_16scalar_random_opIdEES3_EELi3ELi16EE3runERS3_RKS7_: > pushq %rbx > .seh_pushreg %rbx > subq $64, %rsp > .seh_stackalloc 64 > movaps %xmm6, 32(%rsp) > .seh_savexmm %xmm6, 32 > movaps %xmm7, 48(%rsp) > .seh_savexmm %xmm7, 48 > .seh_endprologue > movq %rcx, %rbx > call rand > cvtsi2sd %eax, %xmm0 > movsd .LC0(%rip), %xmm7 > movsd .LC1(%rip), %xmm6 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 24(%rbx) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 32(%rbx) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 40(%rbx) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 48(%rbx) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 56(%rbx) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 64(%rbx) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 72(%rbx) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 80(%rbx) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 88(%rbx) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 96(%rbx) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 104(%rbx) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 112(%rbx) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > movaps 48(%rsp), %xmm7 > subsd %xmm6, %xmm0 > movaps 32(%rsp), %xmm6 > movsd %xmm0, 120(%rbx) > addq $64, %rsp > popq %rbx > ret > .seh_endproc > .def __main; .scl 2; .type 32; .endef > .section .rdata,"dr" >.LC3: > .ascii "Time taken: \0" > .section .text.startup,"x" > .p2align 4,,15 > .globl main > .def main; .scl 2; .type 32; .endef > .seh_proc main >main: > pushq %rsi > .seh_pushreg %rsi > pushq %rbx > .seh_pushreg %rbx > subq $632, %rsp > .seh_stackalloc 632 > movaps %xmm6, 592(%rsp) > .seh_savexmm %xmm6, 592 > movaps %xmm7, 608(%rsp) > .seh_savexmm %xmm7, 608 > .seh_endprologue > call __main > call rand > cvtsi2sd %eax, %xmm0 > leaq 320(%rsp), %rbx > movsd .LC0(%rip), %xmm7 > movsd .LC1(%rip), %xmm6 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 320(%rsp) > call rand > cvtsi2sd %eax, %xmm0 > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 328(%rsp) > call rand > cvtsi2sd %eax, %xmm0 > leaq 576(%rsp), %rdx > movq %rbx, %rcx > addsd %xmm0, %xmm0 > divsd %xmm7, %xmm0 > subsd %xmm6, %xmm0 > movsd %xmm0, 336(%rsp) > call _ZN5Eigen8internal40assign_LinearTraversal_CompleteUnrollingINS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEENS_14CwiseNullaryOpINS0_16scalar_random_opIdEES3_EELi3ELi16EE3runERS3_RKS7_ > leaq 32(%rsp), %rcx > movq %rbx, 32(%rsp) > movq %rbx, 40(%rsp) > call _ZNK5Eigen17CoeffBasedProductIRKNS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEES4_Li6EEcvS4_Ev > movapd (%rax), %xmm0 > leaq 176(%rsp), %rcx > movapd %xmm0, 448(%rsp) > movapd 16(%rax), %xmm0 > movapd %xmm0, 464(%rsp) > movapd 32(%rax), %xmm0 > movapd %xmm0, 480(%rsp) > movapd 48(%rax), %xmm0 > movapd %xmm0, 496(%rsp) > movapd 64(%rax), %xmm0 > movapd %xmm0, 512(%rsp) > movapd 80(%rax), %xmm0 > movapd %xmm0, 528(%rsp) > movapd 96(%rax), %xmm0 > movapd %xmm0, 544(%rsp) > movapd 112(%rax), %xmm0 > leaq 448(%rsp), %rax > movapd %xmm0, 560(%rsp) > movq %rax, 176(%rsp) > movq %rax, 184(%rsp) > call _ZNK5Eigen17CoeffBasedProductIRKNS_6MatrixIdLi4ELi4ELi0ELi4ELi4EEES4_Li6EEcvS4_Ev > call clock > movl %eax, %ebx >/APP > # 26 ".\TestEigen.cpp" 1 > #mybegin > # 0 "" 2 >/NO_APP > movsd .LC2(%rip), %xmm1 > xorl %eax, %eax > .p2align 4,,10 >.L5: > addl $1, %eax > cvtsi2sd %eax, %xmm0 > ucomisd %xmm0, %xmm1 > ja .L5 >/APP > # 30 ".\TestEigen.cpp" 1 > #myend > # 0 "" 2 >/NO_APP > call clock > leaq .LC3(%rip), %rdx > movl %eax, %esi > leaq _ZSt4cout(%rip), %rcx > subl %ebx, %esi > call _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc > cvtsi2sd %esi, %xmm1 > movq %rax, %rcx > divsd .LC4(%rip), %xmm1 > call _ZNSo9_M_insertIdEERSoT_ > movq %rax, %rcx > call _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_ > nop > movaps 592(%rsp), %xmm6 > xorl %eax, %eax > movaps 608(%rsp), %xmm7 > addq $632, %rsp > popq %rbx > popq %rsi > ret > .seh_endproc > .p2align 4,,15 > .def _GLOBAL__sub_I_main; .scl 3; .type 32; .endef > .seh_proc _GLOBAL__sub_I_main >_GLOBAL__sub_I_main: > subq $40, %rsp > .seh_stackalloc 40 > .seh_endprologue > leaq _ZStL8__ioinit(%rip), %rcx > call _ZNSt8ios_base4InitC1Ev > leaq __tcf_0(%rip), %rcx > addq $40, %rsp > jmp atexit > .seh_endproc > .section .ctors,"w" > .align 8 > .quad _GLOBAL__sub_I_main >.lcomm _ZStL8__ioinit,1,1 > .section .rdata,"dr" > .align 8 >.LC0: > .long 0 > .long 1088421824 > .align 8 >.LC1: > .long 0 > .long 1072693248 > .align 8 >.LC2: > .long 0 > .long 1097011920 > .align 8 >.LC4: > .long 0 > .long 1083129856 > .def _ZNSt8ios_base4InitD1Ev; .scl 2; .type 32; .endef > .def rand; .scl 2; .type 32; .endef > .def clock; .scl 2; .type 32; .endef > .def _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc; .scl 2; .type 32; .endef > .def _ZNSo9_M_insertIdEERSoT_; .scl 2; .type 32; .endef > .def _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_; .scl 2; .type 32; .endef > .def _ZNSt8ios_base4InitC1Ev; .scl 2; .type 32; .endef > .def atexit; .scl 2; .type 32; .endef
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 357
: 216 |
217