1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
#include <stdint.h>
#if defined(__i386__)
static uint32_t optmemmeasure(char* memory, uint32_t offset)
{
asm(
"rdtsc ;"
"mov %%eax, %%ebp ;"
"movb (%%ebx), %%al ;"
"xor (%%ebx,%%ecx), %%eax ;"
"movb %%al, (%%ebx) ;"
"movb %%al, (%%ebx,%%ecx) ;"
"rdtsc ;"
"sub %%ebp, %%eax ;"
: : "b" (memory), "c" (offset)
);
}
// we did no acurate implementation on 32 bit x86
static uint32_t memmeasure(char* memory, uint32_t offset)
{
return optmemmeasure(memory, offset);
}
static uint64_t rdtsc()
{
uint64_t rdtsc;
asm(
"rdtsc ;"
: "=A" (rdtsc)
);
return rdtsc;
}
#elif defined(__x86_64__)
// this implementation uses all of the data provided by rdtsc but uses
// more instrucions
static uint64_t memmeasure(char* memory, uint64_t offset)
{
asm(
"rdtsc ;"
"shl $32, %%rdx ;"
"add %%rax, %%rdx ;"
"mov %%rdx, %%rdi ;"
// here be magic dragons and memory access (read segfaults) ahead
"movb (%%rbx), %%al ;"
"xor (%%rbx,%%rcx), %%rax ;"
"movb %%al, (%%rbx) ;"
"movb %%al, (%%rbx,%%rcx) ;"
"rdtsc ;"
"shl $32, %%rdx ;"
"add %%rdx, %%rax ;"
"sub %%rdi, %%rax ;" //result in rax with is the return value
: : "b" (memory), "c" (offset)
);
}
// this implementation only uses the lower part of the values retured by
// rdtsc to save instrucions. it is not significantly faster than the
// accurate one but it has fewer instrucions and by that is less likely
// to be delayed by the scheduler
static uint64_t optmemmeasure(char* memory, uint64_t offset)
{
asm(
"rdtsc ;"
"mov %%eax, %%edi ;"
// here be magic dragons and memory access (read segfaults) ahead
"movb (%%rbx), %%al ;"
"xor (%%rbx,%%rcx), %%rax ;"
"movb %%al, (%%rbx) ;"
"movb %%al, (%%rbx,%%rcx) ;"
"rdtsc ;"
"sub %%edi, %%eax ;"
: : "b" (memory), "c" (offset)
);
}
// smal implentation to get the rdtsc counter
static uint64_t rdtsc()
{
asm(
"rdtsc ;"
"shl $32, %rdx ;"
"add %rdx, %rax"
);
}
#else
#error "This code only supports x86 and x86_64"
#endif
|