处理器/DSP
寄存器变量速度比普通变量存取速度快。对于C程序,寄存器变量不能取地址,编译器会报错。对于C++程序,可以对寄存器变量进行取址操作,编译器不会报错,但是取出来的地址似乎不是寄存器地址,而是内存地址,不知道是不是C++编译器在涉及取址运算时将寄存器变量自动转换成普通变量来处理。
1、只有普通运算
对于上述的普通累加运算而言,采用普通变量耗时0.7177秒,采用寄存器变量耗时0.111秒,速度上确实有明显的差别。
2、涉及取址运算
如果涉及取址运算,采用普通变量耗时0.7867秒,采用寄存器变量耗时0.4792秒,速度上的差别就没有那么显著了。大家可以发现两种变量取出的地址分别是0x6ffe38和0x6ffe3c,是连续的两个地址,那都是内存地址。不能确定,是不是C++编译器在涉及取址运算时自动将寄存器变量当成普通变量来处理。
实际使用时,底层硬件环境的实际情况对寄存器变量的使用会有一些限制。每个函数中只有很少的变量可以保存在寄存器中,且只允许某些类型的变量。但是,过量的寄存器声明并没有什么害处,这是因为编译器可以忽略过量的或者不支持的寄存器变量声明。另外,无论寄存器变量实际上是不是存放在寄存器中,它的地址都是不能访问的。在不同的机器中,对寄存器变量的数目和类型的具体限制也是不同的。 ——《C程序设计语言(第二版) Brain W.Kernighan & Dennis M.Ritchie》
对于C程序,寄存器变量是不能取址的:
God一直致力于研究高并发服务端的开发,这次要优化的是libGod库中的线程本地存储变量,线程本地存储变量访问非常频繁,优化后库的性能应该会提高不少。已知的线程本地存储方法有boost中的thread_specific_ptr类,gcc中的__thread关键字,pthread中的pthread_getspecific函数。这次测试这3中本地存储以及普通变量之间的性能差别,代码如下:
#include 《iostream》
#include 《stdio.h》
#include 《pthread.h》
#include 《boost/thread/thread.hpp》
#include 《boost/thread/tss.hpp》
using namespace std;
class C {
public:
C(int a) {
m_a = a;
printf(“C() %d\n”, m_a);
}
~C() {
printf(“~C() %d\n”, m_a);
}
private:
int m_a;
};
#define TM 3
#if TM == 1
boost::thread_specific_ptr《C》 pc;
const char *testType = “boost”;
#elif TM == 2
__thread C *pc;
const char *testType = “__thread”;
#elif TM == 3
pthread_key_t pc;
const char *testType = “pthread”;
#else
C *pc;
const char *testType = “normal”;
#endif
void boostthreadFunc() {
#if TM == 1
pc.reset(new C(10));
#elif TM == 2
pc = new C(20);
#elif TM == 3
if (pthread_key_create(&pc, NULL)) {
cout 《《 “pthread_key_create” 《《 endl;
return;
}
if (pthread_setspecific(pc, new C(30))) {
cout 《《 “pthread_setspecific” 《《 endl;
return;
}
#else
pc = new C(20);
#endif
int switches = 5000000;
int i = switches;
struct timeval tm_start, tm_end;
gettimeofday(&tm_start, NULL);
while (i--) {
#if TM == 1
C *c1 = pc.get();
C *c2 = pc.get();
C *c3 = pc.get();
C *c4 = pc.get();
C *c5 = pc.get();
C *c6 = pc.get();
C *c7 = pc.get();
C *c8 = pc.get();
C *c9 = pc.get();
C *c10 = pc.get();
C *c11 = pc.get();
C *c12 = pc.get();
C *c13 = pc.get();
C *c14 = pc.get();
C *c15 = pc.get();
C *c16 = pc.get();
C *c17 = pc.get();
C *c18 = pc.get();
C *c19 = pc.get();
C *c20 = pc.get();
C *c21 = pc.get();
C *c22 = pc.get();
C *c23 = pc.get();
C *c24 = pc.get();
C *c25 = pc.get();
C *c26 = pc.get();
C *c27 = pc.get();
C *c28 = pc.get();
C *c29 = pc.get();
C *c30 = pc.get();
C *c31 = pc.get();
C *c32 = pc.get();
C *c33 = pc.get();
C *c34 = pc.get();
C *c35 = pc.get();
C *c36 = pc.get();
C *c37 = pc.get();
C *c38 = pc.get();
C *c39 = pc.get();
C *c40 = pc.get();
#elif TM == 2
C *c1 = pc;
C *c2 = pc;
C *c3 = pc;
C *c4 = pc;
C *c5 = pc;
C *c6 = pc;
C *c7 = pc;
C *c8 = pc;
C *c9 = pc;
C *c10 = pc;
C *c11 = pc;
C *c12 = pc;
C *c13 = pc;
C *c14 = pc;
C *c15 = pc;
C *c16 = pc;
C *c17 = pc;
C *c18 = pc;
C *c19 = pc;
C *c20 = pc;
C *c21 = pc;
C *c22 = pc;
C *c23 = pc;
C *c24 = pc;
C *c25 = pc;
C *c26 = pc;
C *c27 = pc;
C *c28 = pc;
C *c29 = pc;
C *c30 = pc;
C *c31 = pc;
C *c32 = pc;
C *c33 = pc;
C *c34 = pc;
C *c35 = pc;
C *c36 = pc;
C *c37 = pc;
C *c38 = pc;
C *c39 = pc;
C *c40 = pc;
#elif TM == 3
C *c1 = (C *)pthread_getspecific(pc);
C *c2 = (C *)pthread_getspecific(pc);
C *c3 = (C *)pthread_getspecific(pc);
C *c4 = (C *)pthread_getspecific(pc);
C *c5 = (C *)pthread_getspecific(pc);
C *c6 = (C *)pthread_getspecific(pc);
C *c7 = (C *)pthread_getspecific(pc);
C *c8 = (C *)pthread_getspecific(pc);
C *c9 = (C *)pthread_getspecific(pc);
C *c10 = (C *)pthread_getspecific(pc);
C *c11 = (C *)pthread_getspecific(pc);
C *c12 = (C *)pthread_getspecific(pc);
C *c13 = (C *)pthread_getspecific(pc);
C *c14 = (C *)pthread_getspecific(pc);
C *c15 = (C *)pthread_getspecific(pc);
C *c16 = (C *)pthread_getspecific(pc);
C *c17 = (C *)pthread_getspecific(pc);
C *c18 = (C *)pthread_getspecific(pc);
C *c19 = (C *)pthread_getspecific(pc);
C *c20 = (C *)pthread_getspecific(pc);
C *c21 = (C *)pthread_getspecific(pc);
C *c22 = (C *)pthread_getspecific(pc);
C *c23 = (C *)pthread_getspecific(pc);
C *c24 = (C *)pthread_getspecific(pc);
C *c25 = (C *)pthread_getspecific(pc);
C *c26 = (C *)pthread_getspecific(pc);
C *c27 = (C *)pthread_getspecific(pc);
C *c28 = (C *)pthread_getspecific(pc);
C *c29 = (C *)pthread_getspecific(pc);
C *c30 = (C *)pthread_getspecific(pc);
C *c31 = (C *)pthread_getspecific(pc);
C *c32 = (C *)pthread_getspecific(pc);
C *c33 = (C *)pthread_getspecific(pc);
C *c34 = (C *)pthread_getspecific(pc);
C *c35 = (C *)pthread_getspecific(pc);
C *c36 = (C *)pthread_getspecific(pc);
C *c37 = (C *)pthread_getspecific(pc);
C *c38 = (C *)pthread_getspecific(pc);
C *c39 = (C *)pthread_getspecific(pc);
C *c40 = (C *)pthread_getspecific(pc);
#else
C *c1 = pc;
C *c2 = pc;
C *c3 = pc;
C *c4 = pc;
C *c5 = pc;
C *c6 = pc;
C *c7 = pc;
C *c8 = pc;
C *c9 = pc;
C *c10 = pc;
C *c11 = pc;
C *c12 = pc;
C *c13 = pc;
C *c14 = pc;
C *c15 = pc;
C *c16 = pc;
C *c17 = pc;
C *c18 = pc;
C *c19 = pc;
C *c20 = pc;
C *c21 = pc;
C *c22 = pc;
C *c23 = pc;
C *c24 = pc;
C *c25 = pc;
C *c26 = pc;
C *c27 = pc;
C *c28 = pc;
C *c29 = pc;
C *c30 = pc;
C *c31 = pc;
C *c32 = pc;
C *c33 = pc;
C *c34 = pc;
C *c35 = pc;
C *c36 = pc;
C *c37 = pc;
C *c38 = pc;
C *c39 = pc;
C *c40 = pc;
#endif
}
gettimeofday(&tm_end, NULL);
switches *= 40;
long long ns = (tm_end.tv_sec - tm_start.tv_sec) * 1000LL * 1000LL * 1000LL +
(tm_end.tv_usec - tm_start.tv_usec) * 1000LL;
std::cout 《《 “####Benchmark result#### ” 《《 testType 《《 std::endl;
std::cout 《《 “Totol switches : ” 《《 switches 《《 std::endl;
std::cout 《《 “Cost per switch(ns) : ” 《《 (double)ns/switches 《《 std::endl;
std::cout 《《 “All cost switch(ns) : ” 《《 ns 《《 std::endl;
std::cout 《《 “####Benchmark result####” 《《 std::endl;
}
int main() {
boost::thread bt(&boostthreadFunc);
bt.join();
printf(“main exit.。\n”);
return 0;
}
全部0条评论
快来发表一下你的评论吧 !