int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
CPU
RAM
CPU
RAM
CPU
RAM
L1
L2
L3
{
CPU
RAM
L1
L2
L3
x = 5;
👀
CPU
RAM
L1
L2
L3
x = 5;
👀
CPU
RAM
L1
L2
L3
x = 5;
👀
CPU
RAM
L1
L2
L3
x = 5;
👀
RAM
L1
L2
L3
x = 5;
&x == 0x7ffe19e65d24
0x7f0d09490260
logical address
physical address
Translation lookaside buffer
👀
Intel Core i7 920 Bloomfield
(2008)
Intel Core i7 920 Bloomfield
(2008)
Q
U
E
U
E
CORE
CORE
CORE
CORE
MEMORY CONTROLLER
Q
P
I
0
Q
P
I
1
M
I
S
C
I
O
M
I
S
C
I
O
Intel Core i7 920 Bloomfield
(2008)
ick
ath
nterconnect
Q
U
E
U
E
CORE
CORE
CORE
CORE
MEMORY CONTROLLER
L2
L2
L2
L2
L1
L1
L1
L1
SHARED
L3 CACHE
Q
P
I
0
Q
P
I
1
M
I
S
C
I
O
M
I
S
C
I
O
Intel Core i7 920 Bloomfield
(2008)
[word here]
👀
👀
cache
hit
cache
miss
{
cache size [bytes]
L1 cache
L1_D
L1_I
instruction cache
movl -16(%rbp), %eax
cmpl %eax, -4(%rbp)
jge .L9
leaq -24(%rbp), %rax
movq %rax, %rsi
movl $_ZSt3cin, %edi
call _ZNSirsERi
movl -24(%rbp), %eax
subl $1, %eax
movl %eax, -24(%rbp)
movl -24(%rbp), %eax
movslq %eax, %rdx
movl tab(,%rdx,4), %edx
addl $1, %edx
cltq
movl %edx, tab(,%rax,4)
addl $1, -4(%rbp)
jmp .L10
read x;
👀
Translation lookaside buffer
read x;
👀
Translation lookaside buffer
read x;
👀
Translation lookaside buffer
read x;
👀
Translation lookaside buffer
read x;
👀
Translation lookaside buffer
read x;
👀
Translation lookaside buffer
}
cache line
{
cache entry
cache entry tag
cache replacement policy
cache write policy
cache placement policy
N-way
associative
cache line
cache entry
cache entry
cache entry
cache entry
1 - way associative = direct-mapped cache
∞ - way associative = fully associative cache
( )
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
x cache misses
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
x * y cache misses
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
cache misses
cache misses
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int i = 0; i < x; ++i)
{
for (int j = 0; j < y; ++j)
{
n += ar[i][j];
}
}
// n == 0
int x = 8192, y = 16;
static int ar[x][y];
int n = 0;
for (int j = 0; j < y; ++j)
{
for (int i = 0; i < x; ++i)
{
n += ar[i][j];
}
}
// n == 0
cache misses
cache misses
Locality of reference
Spatial (data locality)
Temporal
std::array
std::vector
branch friendly code
data oriented design
std::list
std::map
std::set std::unordered_map std::unordered_set
std::binary_search
std::valarray
virtual
function pointers
pointers
false sharing
contiguous memory access
random memory access
(hyperthreading)
std::vector<int> v = CreateVec(100000);
// v == {0, 1, 2, ...}
int n = 0;
for (int i = 0; i < v.size(); ++i)
{
if (v[i] < 50000)
{
++n;
}
}
// n == 50000
std::vector<int> v = CreateVec(100000);
ShuffleVec(v);
int n = 0;
for (int i = 0; i < v.size(); ++i)
{
if (v[i] < 50000)
{
++n;
}
}
// n == 50000
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
???
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
true
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
true
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
???
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
???
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
???
branch prediction
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
???
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
true
correct branch prediction
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
???
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
???
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
???
branch prediction
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
???
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
false
branch misprediction
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
false
pipeline flush
if (cond)
cond
!cond
bool cond = input();
if (cond)
{
return 13;
}
else
{
return 17;
}
bool cond = input();
return 13;
return 17;
cond =
false
std::vector<int> v = CreateVec(100000);
// v == {0, 1, 2, ...}
int n = 0;
for (int i = 0; i < v.size(); ++i)
{
if (v[i] < 50000)
{
++n;
}
}
// n == 50000
std::vector<int> v = CreateVec(100000);
ShuffleVec(v);
int n = 0;
for (int i = 0; i < v.size(); ++i)
{
if (v[i] < 50000)
{
++n;
}
}
// n == 50000