Monday, 25 November 2013

Misunderstanding of c++ 01 -- c++ must slower than c because c++ always construct and destruct object

    Unlike c, c++ support constructor and destructor, these are powerful tool for resource management. Many(or some?) c programmers claim that constructor and destructor could kill the performance, and they don't want to take the burden of it, they want to gain full control and this seems like an impossible mission in c++. Is this true? Absolutely no.

    What kind of mistakes they make?

1 : We do not have to take the cost of constructor and destructor, it is not a must.In other words, if your structure or class satisfy the requirements of trivial constructor and trivial destructor, you program will not invoke any constructor nor destructor.

A class or struct is trivial constructor if

A constructor of a class A is trivial if all the following are true:
  • It is implicitly defined
  • A has no virtual functions and no virtual base classes
  • All the direct base classes of A have trivial constructors
  • The classes of all the nonstatic data members of A have trivial constructors
If any of the above are false, then the constructor is nontrivial.

A class or struct is trivial destructor if

A destructor of a class A is trivial if all the following are true:
  • It is implicitly defined
  • All the direct base classes of A have trivial destructors
  • The classes of all the nonstatic data members of A have trivial destructors
If any of the above are false, then the destructor is nontrivial.


  All of the POD have trivial destructor and trivial constructor.

2 :  You think constructor and destructor cost you something, but in truth they rarely do(compare with equivalent C behavior)


  In theory, we don't have to take the cost of constructor and destructor,  moreover,  they rarely cost us anything. Theory is good, let us fallback to reality, can compiler get the job done?If you don't sure about it, measure, rather than guessing about performance.

  Following codes are compiled by clang3.3 and clang++3.3 on mac OS X 10.8.5(mac mini, intel cpu).


High level codes and associative assembly of case 1
 
 

c codes
struct trivialStruct
{

int a;
float b;
double c;

};

int main()
{
  struct trivialStruct A;  

  return 0;
}

assembly generated by command "clang -S -O2 -mllvm --x86-asm-syntax=intel trivialConstructDestruct00.c"
 .section __TEXT,__text,regular,pure_instructions
 .globl _main
 .align 4, 0x90
_main:                                  ## @main
 .cfi_startproc
## BB#0:                                ## %entry
 push RBP
Ltmp2:
 .cfi_def_cfa_offset 16
Ltmp3:
 .cfi_offset rbp, -16
 mov RBP, RSP
Ltmp4:
 .cfi_def_cfa_register rbp
 lea RDI, QWORD PTR [RIP + L_.str]
 mov AL, 2
 call _printf
 xor EAX, EAX
 pop RBP
 ret
 .cfi_endproc

 .section __TEXT,__cstring,cstring_literals
L_.str:                                 ## @.str
 .asciz  "%d, %f, %f"


.subsections_via_symbols


c++ codes
struct trivialStruct
{

int a;
float b;
double c;

};

int main()
{
  trivialStruct A;  

  return 0;
}

assembly generated by command "clang++ -S -O2 -mllvm --x86-asm-syntax=intel trivialConstructDestruct00.cpp"
 .section __TEXT,__text,regular,pure_instructions
 .globl _main
 .align 4, 0x90
_main:                                  ## @main
 .cfi_startproc
## BB#0:                                ## %entry
 push RBP
Ltmp2:
 .cfi_def_cfa_offset 16
Ltmp3:
 .cfi_offset rbp, -16
 mov RBP, RSP
Ltmp4:
 .cfi_def_cfa_register rbp
 lea RDI, QWORD PTR [RIP + L_.str]
 mov AL, 2
 call _printf
 xor EAX, EAX
 pop RBP
 ret
 .cfi_endproc

 .section __TEXT,__cstring,cstring_literals
L_.str:                                 ## @.str
 .asciz  "%d, %f, %f"


.subsections_via_symbols



  Apparently, c++ have to take the cost of destructor and constructor are false, they are part of the misunderstading because of FUD.

High level codes and associative assembly of case 2

c

#include 
#include 

struct trivialStruct
{

int *a;
float *b;
float *c;

};

void construct_trivial_struct(struct trivialStruct *data)
{
  data->a = (int*)malloc(sizeof(int));
  data->b = (float*)malloc(sizeof(float));
  data->c = (float*)malloc(sizeof(float));
  
  *data->a = 100;
  *data->b = 200;
  *data->c = 300;
}

void destruct_trivial_struct(struct trivialStruct *data)
{
  free(data->a);
  free(data->b);
  free(data->c);
  
  data->a = NULL;
  data->b = NULL;
  data->c = NULL;
}

int main()
{
  struct trivialStruct A;
  construct_trivial_struct(&A);
  printf("%d, %f, %f", *A.a, *A.b, *A.c);
  
  destruct_trivial_struct(&A);

  return 0;
}


assembly generated by command "clang -S -O2 -mllvm --x86-asm-syntax=intel trivialConstructDestruct00.c"
 .section __TEXT,__text,regular,pure_instructions
 .globl _construct_trivial_struct
 .align 4, 0x90
_construct_trivial_struct:              ## @construct_trivial_struct
 .cfi_startproc
## BB#0:                                ## %entry
 push RBP
Ltmp3:
 .cfi_def_cfa_offset 16
Ltmp4:
 .cfi_offset rbp, -16
 mov RBP, RSP
Ltmp5:
 .cfi_def_cfa_register rbp
 push R15
 push R14
 push RBX
 push RAX
Ltmp6:
 .cfi_offset rbx, -40
Ltmp7:
 .cfi_offset r14, -32
Ltmp8:
 .cfi_offset r15, -24
 mov RBX, RDI
 mov EDI, 4
 call _malloc
 mov R14, RAX
 mov QWORD PTR [RBX], R14
 mov EDI, 4
 call _malloc
 mov R15, RAX
 mov QWORD PTR [RBX + 8], R15
 mov EDI, 4
 call _malloc
 mov QWORD PTR [RBX + 16], RAX
 mov DWORD PTR [R14], 100
 mov DWORD PTR [R15], 1128792064
 mov DWORD PTR [RAX], 1133903872
 add RSP, 8
 pop RBX
 pop R14
 pop R15
 pop RBP
 ret
 .cfi_endproc

 .globl _destruct_trivial_struct
 .align 4, 0x90
_destruct_trivial_struct:               ## @destruct_trivial_struct
 .cfi_startproc
## BB#0:                                ## %entry
 push RBP
Ltmp12:
 .cfi_def_cfa_offset 16
Ltmp13:
 .cfi_offset rbp, -16
 mov RBP, RSP
Ltmp14:
 .cfi_def_cfa_register rbp
 push RBX
 push RAX
Ltmp15:
 .cfi_offset rbx, -24
 mov RBX, RDI
 mov RDI, QWORD PTR [RBX]
 call _free
 mov RDI, QWORD PTR [RBX + 8]
 call _free
 mov RDI, QWORD PTR [RBX + 16]
 call _free
 mov QWORD PTR [RBX + 16], 0
 mov QWORD PTR [RBX + 8], 0
 mov QWORD PTR [RBX], 0
 add RSP, 8
 pop RBX
 pop RBP
 ret
 .cfi_endproc

 .section __TEXT,__literal8,8byte_literals
 .align 3
LCPI2_0:
 .quad 4641240890982006784     ## double 200
LCPI2_1:
 .quad 4643985272004935680     ## double 300
 .section __TEXT,__text,regular,pure_instructions
 .globl _main
 .align 4, 0x90
_main:                                  ## @main
 .cfi_startproc
## BB#0:                                ## %entry
 push RBP
Ltmp18:
 .cfi_def_cfa_offset 16
Ltmp19:
 .cfi_offset rbp, -16
 mov RBP, RSP
Ltmp20:
 .cfi_def_cfa_register rbp
 lea RDI, QWORD PTR [RIP + L_.str]
 movsd XMM0, QWORD PTR [RIP + LCPI2_0]
 movsd XMM1, QWORD PTR [RIP + LCPI2_1]
 mov ESI, 100
 mov AL, 2
 call _printf
 xor EAX, EAX
 pop RBP
 ret
 .cfi_endproc

 .section __TEXT,__cstring,cstring_literals
L_.str:                                 ## @.str
 .asciz  "%d, %f, %f"


.subsections_via_symbols
c++
#include 
#include 

struct trivialStruct
{

trivialStruct();
~trivialStruct();

int *a;
float *b;
float *c;

};

trivialStruct::trivialStruct() : 
a((int*)malloc(sizeof(int))), 
b((float*)malloc(sizeof(float))), 
c((float*)malloc(sizeof(float)))
{
  *a = 100;
  *b = 200;
  *c = 300;
}

trivialStruct::~trivialStruct()
{
  free(a);
  free(b);
  free(c);
  
  a = nullptr;
  b = nullptr;
  c = nullptr;
}

int main()
{
  trivialStruct A;
  printf("%d, %f, %f", *A.a, *A.b, *A.c);

  return 0;
}

 .section __TEXT,__text,regular,pure_instructions
 .globl __ZN13trivialStructC1Ev
 .align 4, 0x90
__ZN13trivialStructC1Ev:                ## @_ZN13trivialStructC1Ev
 .cfi_startproc
## BB#0:                                ## %entry
 push RBP
Ltmp3:
 .cfi_def_cfa_offset 16
Ltmp4:
 .cfi_offset rbp, -16
 mov RBP, RSP
Ltmp5:
 .cfi_def_cfa_register rbp
 push R15
 push R14
 push RBX
 push RAX
Ltmp6:
 .cfi_offset rbx, -40
Ltmp7:
 .cfi_offset r14, -32
Ltmp8:
 .cfi_offset r15, -24
 mov RBX, RDI
 mov EDI, 4
 call _malloc
 mov R14, RAX
 mov QWORD PTR [RBX], R14
 mov EDI, 4
 call _malloc
 mov R15, RAX
 mov QWORD PTR [RBX + 8], R15
 mov EDI, 4
 call _malloc
 mov QWORD PTR [RBX + 16], RAX
 mov DWORD PTR [R14], 100
 mov DWORD PTR [R15], 1128792064
 mov DWORD PTR [RAX], 1133903872
 add RSP, 8
 pop RBX
 pop R14
 pop R15
 pop RBP
 ret
 .cfi_endproc

 .globl __ZN13trivialStructC2Ev
 .align 4, 0x90
__ZN13trivialStructC2Ev:                ## @_ZN13trivialStructC2Ev
 .cfi_startproc
## BB#0:                                ## %entry
 push RBP
Ltmp12:
 .cfi_def_cfa_offset 16
Ltmp13:
 .cfi_offset rbp, -16
 mov RBP, RSP
Ltmp14:
 .cfi_def_cfa_register rbp
 push R15
 push R14
 push RBX
 push RAX
Ltmp15:
 .cfi_offset rbx, -40
Ltmp16:
 .cfi_offset r14, -32
Ltmp17:
 .cfi_offset r15, -24
 mov RBX, RDI
 mov EDI, 4
 call _malloc
 mov R14, RAX
 mov QWORD PTR [RBX], R14
 mov EDI, 4
 call _malloc
 mov R15, RAX
 mov QWORD PTR [RBX + 8], R15
 mov EDI, 4
 call _malloc
 mov QWORD PTR [RBX + 16], RAX
 mov DWORD PTR [R14], 100
 mov DWORD PTR [R15], 1128792064
 mov DWORD PTR [RAX], 1133903872
 add RSP, 8
 pop RBX
 pop R14
 pop R15
 pop RBP
 ret
 .cfi_endproc

 .globl __ZN13trivialStructD1Ev
 .align 4, 0x90
__ZN13trivialStructD1Ev:                ## @_ZN13trivialStructD1Ev
 .cfi_startproc
## BB#0:                                ## %entry
 push RBP
Ltmp21:
 .cfi_def_cfa_offset 16
Ltmp22:
 .cfi_offset rbp, -16
 mov RBP, RSP
Ltmp23:
 .cfi_def_cfa_register rbp
 push RBX
 push RAX
Ltmp24:
 .cfi_offset rbx, -24
 mov RBX, RDI
 mov RDI, QWORD PTR [RBX]
 call _free
 mov RDI, QWORD PTR [RBX + 8]
 call _free
 mov RDI, QWORD PTR [RBX + 16]
 call _free
 mov QWORD PTR [RBX + 16], 0
 mov QWORD PTR [RBX + 8], 0
 mov QWORD PTR [RBX], 0
 add RSP, 8
 pop RBX
 pop RBP
 ret
 .cfi_endproc

 .globl __ZN13trivialStructD2Ev
 .align 4, 0x90
__ZN13trivialStructD2Ev:                ## @_ZN13trivialStructD2Ev
 .cfi_startproc
## BB#0:                                ## %entry
 push RBP
Ltmp28:
 .cfi_def_cfa_offset 16
Ltmp29:
 .cfi_offset rbp, -16
 mov RBP, RSP
Ltmp30:
 .cfi_def_cfa_register rbp
 push RBX
 push RAX
Ltmp31:
 .cfi_offset rbx, -24
 mov RBX, RDI
 mov RDI, QWORD PTR [RBX]
 call _free
 mov RDI, QWORD PTR [RBX + 8]
 call _free
 mov RDI, QWORD PTR [RBX + 16]
 call _free
 mov QWORD PTR [RBX + 16], 0
 mov QWORD PTR [RBX + 8], 0
 mov QWORD PTR [RBX], 0
 add RSP, 8
 pop RBX
 pop RBP
 ret
 .cfi_endproc

 .section __TEXT,__literal8,8byte_literals
 .align 3
LCPI4_0:
 .quad 4641240890982006784     ## double 200
LCPI4_1:
 .quad 4643985272004935680     ## double 300
 .section __TEXT,__text,regular,pure_instructions
 .globl _main
 .align 4, 0x90
_main:                                  ## @main
 .cfi_startproc
## BB#0:                                ## %entry
 push RBP
Ltmp34:
 .cfi_def_cfa_offset 16
Ltmp35:
 .cfi_offset rbp, -16
 mov RBP, RSP
Ltmp36:
 .cfi_def_cfa_register rbp
 lea RDI, QWORD PTR [RIP + L_.str]
 movsd XMM0, QWORD PTR [RIP + LCPI4_0]
 movsd XMM1, QWORD PTR [RIP + LCPI4_1]
 mov ESI, 100
 mov AL, 2
 call _printf
 xor EAX, EAX
 pop RBP
 ret
 .cfi_endproc

 .section __TEXT,__cstring,cstring_literals
L_.str:                                 ## @.str
 .asciz  "%d, %f, %f"


.subsections_via_symbols

    We could see that the part of construct and destruct are same as the assembly generated by c.Although the compiler generate two pieces of constructor and destructor for us, this do not mean the codes will become fatter or slower, because the linker could remove the duplicate the codes for us, even if they are not removed(hard to believe this will happen in modern linker), they will take some space in memory, but will never be evaluated.

    One of the way to verify this is separate the declaration and definition of the struct, generate the assembly by same command and look at the codes, you will find that the assembly only call for one symbol of the constructor. The final step is verify the size of the exe, if the linker haven't removed duplicate codes, the size of the exe should be fatter. The other solution is download a disassemble to analyze the exe by yourself.

    In the conclusion, most of the times we have to construct or destruct the object, so constructor and destructor rarely cost us anything(compare with equivalent c behavior).Even you really don't them, you could disable them by your wish, nothing stop you from gaining full control in c++.

    Codes are available on github.