def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
0x8000
+00
+04
+08
+0c
+10
+14
+18
+1c
0x2000
0x2020
0x2040
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst=•0x1128
+04
int n3
+08
+0c
+10
+14
+18
+1c
0x2000
0x2020
0x2040
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst=•0x1128
+04
int n3
+08
Cons(2,•)0x12a0
+0c
int n2
+10
+14
+18
+1c
0x2000
0x2020
0x2040
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst=•0x1128
+04
int n3
+08
Cons(2,•)0x12a0
+0c
int n2
+10
Cons(1,•)0x12a0
+14
int n1
+18
+1c
0x2000
0x2020
0x2040
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst=•0x1128
+04
int n3
+08
Cons(2,•)0x12a0
+0c
int n2
+10
Cons(1,•)0x12a0
+14
int n1
+18
Cons(0,•)0x12a0
+1c
int n0
0x2000
0x2020
0x2040
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst=•0x1128
+04
int n3
+08
Cons(2,•)0x12a0
+0c
int n2
+10
Cons(1,•)0x12a0
+14
int n1
+18
Cons(0,•)0x12a0
+1c
int n0
0x2000
0x2020
0x2040
None
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst=•0x1128
+04
int n3
+08
Cons(2,•)0x12a0
+0c
int n2
+10
Cons(1,•)0x12a0
+14
int n1
+18
Cons(0,•)0x12a0
+1c
result0x2000
0x2000
0x2020
0x2040
None
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst=•0x1128
+04
int n3
+08
Cons(2,•)0x12a0
+0c
int n2
+10
Cons(1,•)0x12a0
+14
int n1
+18
Cons(0,•)0x12a0
+1c
result0x2000
0x2000
0x2020
0x2040
None
Cons
0
0x2000
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst=•0x1128
+04
int n3
+08
Cons(2,•)0x12a0
+0c
int n2
+10
Cons(1,•)0x12a0
+14
result0x2008
+18
+1c
0x2000
0x2020
0x2040
None
Cons
0
0x2000
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst=•0x1128
+04
int n3
+08
Cons(2,•)0x12a0
+0c
int n2
+10
Cons(1,•)0x12a0
+14
result0x2008
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
0
0x2000
Cons
1
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst=•0x1128
+04
int n3
+08
Cons(2,•)0x12a0
+0c
result0x2018
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
0
0x2000
Cons
1
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst=•0x1128
+04
int n3
+08
Cons(2,•)0x12a0
+0c
result0x2018
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst=•0x1128
+04
result0x2028
+08
+0c
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst0x2028
+04
+08
+0c
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
def iotaR(n):
if n <1: returnNoneelse: return Cons(n -1, iotaR(n -1))
lst = iotaR(3)
0x8000
+00
lst0x2028
+04
+08
+0c
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
defsum(lst):
total =0while lst !=None:
(total, lst) = (total + lst.hd, lst tl)
return total
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
defsum(lst):
total =0while lst !=None:
(total, lst) = (total + lst.hd, lst tl)
return total
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
defsum(lst):
total =0while lst !=None:
(total, lst) = (total + lst.hd, lst tl)
return total
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total2
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
defsum(lst):
total =0while lst !=None:
(total, lst) = (total + lst.hd, lst tl)
return total
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2018
+0c
total2
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
defsum(lst):
total =0while lst !=None:
(total, lst) = (total + lst.hd, lst tl)
return total
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2018
+0c
total3
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
defsum(lst):
total =0while lst !=None:
(total, lst) = (total + lst.hd, lst tl)
return total
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2008
+0c
total3
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
defsum(lst):
total =0while lst !=None:
(total, lst) = (total + lst.hd, lst tl)
return total
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2008
+0c
total3
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
defsum(lst):
total =0while lst !=None:
(total, lst) = (total + lst.hd, lst tl)
return total
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2000
+0c
total3
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
defsum(lst):
total =0while lst !=None:
(total, lst) = (total + lst.hd, lst tl)
return total
Lies!
R0
R1
R2
R3
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
&total0x800c
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total0
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total0
&xs0x8008
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total0
xs0x2028
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total0
xs0x2028
&xs.hd0x202c
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total0
xs0x2028
xs.hd2
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total2
xs0x2028
xs.hd2
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total2
&xs.tl0x2030
xs.hd2
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total2
xs.tl0x2018
xs.hd2
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total2
xs0x2018
2
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total2
xs0x2018
&xs.hd0x201c
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total2
xs0x2018
xs.hd1
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total3
xs0x2018
xs.hd1
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total3
&xs.tl0x2020
xs.hd1
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total3
xs.tl0x2008
xs.hd1
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total3
xs0x2008
1
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total3
xs0x2008
&xs.hd0x200c
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total3
xs0x2008
xs.hd0
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total3
xs0x2008
xs.hd0
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total3
&xs.tl0x2010
xs.hd0
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total3
xs.tl0x2000
xs.hd0
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total3
xs0x2000
0
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total0
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
R0
R1
R2
R3
SP0x8010
total3
xs0x2000
0
the register file
0x8000
+00
lst0x2028
+04
RA
+08
lst0x2028
+0c
total3
+10
+14
+18
+1c
0x2000
0x2020
0x2040
None
0x2008
Cons
Cons
0
2
0x2000
0x2018
Cons
1
the stack
the heap
Arithmetic
Memory
No. of instructions
12
8
Cycles per operation
1
?
Total CPU cycles
12
800
Percent of time
1.5%
98.5%
The cycle time (period) is the reciprocal of the clock speed (frequency). For a 2.5 GHz CPU, for example, the cycle time is \(1 / (2.5 \times 10^9) = 0.4\mathop{\mathrm{ns}} = 0.0000000004\mathop{\mathrm{s}}.\)
Arithmetic
Memory
No. of instructions
12
8
Cycles per operation
1
?
Total CPU cycles
12
800
Percent of time
1.5%
98.5%
The cycle time (period) is the reciprocal of the clock speed (frequency). For a 2.5 GHz CPU, for example, the cycle time is \(1 / (2.5 \times 10^9) = 0.4\mathop{\mathrm{ns}} = 0.0000000004\mathop{\mathrm{s}}.\)
Arithmetic
Memory
No. of instructions
12
8
Cycles per operation
1
100
Total CPU cycles
12
800
Percent of time
1.5%
98.5%
The cycle time (period) is the reciprocal of the clock speed (frequency). For a 2.5 GHz CPU, for example, the cycle time is \(1 / (2.5 \times 10^9) = 0.4\mathop{\mathrm{ns}} = 0.0000000004\mathop{\mathrm{s}}.\)
Arithmetic
Memory
No. of instructions
12
8
Cycles per operation
× 1
× 100
Total CPU cycles
12
800
Percent of time
1.5%
98.5%
The cycle time (period) is the reciprocal of the clock speed (frequency). For a 2.5 GHz CPU, for example, the cycle time is \(1 / (2.5 \times 10^9) = 0.4\mathop{\mathrm{ns}} = 0.0000000004\mathop{\mathrm{s}}.\)
Arithmetic
Memory
No. of instructions
12
8
Cycles per operation
× 1
× 100
Total CPU cycles
12
800
Percent of time
1.5%
98.5%
The cycle time (period) is the reciprocal of the clock speed (frequency). For a 2.5 GHz CPU, for example, the cycle time is \(1 / (2.5 \times 10^9) = 0.4\mathop{\mathrm{ns}} = 0.0000000004\mathop{\mathrm{s}}.\)
ArithmeticRegisters
MemoryDRAM
No. of instructions
12
8
Cycles per operation
× 1
× 100
Total CPU cycles
12
800
Percent of time
1.5%
98.5%
The cycle time (period) is the reciprocal of the clock speed (frequency). For a 2.5 GHz CPU, for example, the cycle time is \(1 / (2.5 \times 10^9) = 0.4\mathop{\mathrm{ns}} = 0.0000000004\mathop{\mathrm{s}}.\)
The memory hierarchy
Cache
Registers
DRAM
Latency (cycles)
1
100
Latency (ns)
0.4
40
Price (US$/GiB)
priceless
$50
Typical size
128 B
128 KiB
1 MiB
6 MiB
16 GiB
\(2^k\) bytes
\(7\)
\(17\)
\(20\)
\(22.6\)
\(34\)
Average length of load or store (in clock cycles)
Without cache:
100
With cache (assuming 90% hit rate):
\(.9 × 10 + .1 × 100 = 19\)
The memory hierarchy
Cache
Registers
Cache
DRAM
Latency (cycles)
1
10
100
Latency (ns)
0.4
4
40
Price (US$/GiB)
priceless
?
$50
Typical size
128 B
128 KiB
1 MiB
6 MiB
16 GiB
\(2^k\) bytes
\(7\)
\(17\)
\(20\)
\(22.6\)
\(34\)
Average length of load or store (in clock cycles)
Without cache:
100
With cache (assuming 90% hit rate):
\(.9 × 10 + .1 × 100 = 19\)
The memory hierarchy
Cache
Registers
Cache
DRAM
Latency (cycles)
1
10
100
Latency (ns)
0.4
4
40
Price (US$/GiB)
priceless
?
$50
Typical size
128 B
128 KiB
1 MiB
6 MiB
16 GiB
\(2^k\) bytes
\(7\)
\(17\)
\(20\)
\(22.6\)
\(34\)
Average length of load or store (in clock cycles)
Without cache:
100
With cache (assuming 90% hit rate):
\(.9 × 10 + .1 × 100 = 19\)
The memory hierarchy
Cache
Registers
Cache
DRAM
Latency (cycles)
1
10
100
Latency (ns)
0.4
4
40
Price (US$/GiB)
priceless
?
$50
Typical size
128 B
128 KiB
1 MiB
6 MiB
16 GiB
\(2^k\) bytes
\(7\)
\(17\)
\(20\)
\(22.6\)
\(34\)
Average length of load or store (in clock cycles)
Without cache:
100
With cache (assuming 90% hit rate):
\(.9 × 10 + .1 × 100 = 19\)
The memory hierarchy
Cache
Registers
Cache
DRAM
Latency (cycles)
1
10
100
Latency (ns)
0.4
4
40
Price (US$/GiB)
priceless
?
$50
Typical size
128 B
128 KiB
1 MiB
6 MiB
16 GiB
\(2^k\) bytes
\(7\)
\(17\)
\(20\)
\(22.6\)
\(34\)
Average length of load or store (in clock cycles)
Without cache:
100
With cache (assuming 90% hit rate):
\(.9 × 10 + .1 × 100 = 19\)
The memory hierarchy
Cache
Registers
L1
DRAM
Latency (cycles)
1
2
100
Latency (ns)
0.4
0.8
40
Price (US$/GiB)
priceless
n.f.s.
$50
Typical size
128 B
128 KiB
1 MiB
6 MiB
16 GiB
\(2^k\) bytes
\(7\)
\(17\)
\(20\)
\(22.6\)
\(34\)
Average length of load or store (in clock cycles)
Without cache:
100
With cache (assuming 90% hit rate):
\(.9 × 10 + .1 × 100 = 19\)
The memory hierarchy
Cache
Registers
L1
L2
DRAM
Latency (cycles)
1
2
10
100
Latency (ns)
0.4
0.8
4
40
Price (US$/GiB)
priceless
n.f.s.
n.f.s.
$50
Typical size
128 B
128 KiB
1 MiB
6 MiB
16 GiB
\(2^k\) bytes
\(7\)
\(17\)
\(20\)
\(22.6\)
\(34\)
Average length of load or store (in clock cycles)
Without cache:
100
With cache (assuming 90% hit rate):
\(.9 × 10 + .1 × 100 = 19\)
The memory hierarchy
Cache
Registers
L1
L2
L3
DRAM
Latency (cycles)
1
2
10
40
100
Latency (ns)
0.4
0.8
4
16
40
Price (US$/GiB)
priceless
n.f.s.
n.f.s.
$2500
$50
Typical size
128 B
128 KiB
1 MiB
6 MiB
16 GiB
\(2^k\) bytes
\(7\)
\(17\)
\(20\)
\(22.6\)
\(34\)
Average length of load or store (in clock cycles)
Without cache:
100
With cache (assuming 90% hit rate):
\(.9 × 10 + .1 × 100 = 19\)
The memory hierarchy
Cache
Registers
L1
L2
L3
DRAM
Latency (cycles)
1
2
10
40
100
Latency (ns)
0.4
0.8
4
16
40
Price (US$/GiB)
priceless
n.f.s.
n.f.s.
$2500
$50
Typical size
128 B
128 KiB
1 MiB
6 MiB
16 GiB
\(2^k\) bytes
\(7\)
\(17\)
\(20\)
\(22.6\)
\(34\)
Average length of load or store (in clock cycles)
Without cache:
100
With cache (assuming 90% hit rate):
\(.9 × 10 + .1 × 100 = 19\)
The memory hierarchy
Cache
Registers
L1
L2
L3
DRAM
Latency (cycles)
1
2
10
40
100
Latency (ns)
0.4
0.8
4
16
40
Price (US$/GiB)
priceless
n.f.s.
n.f.s.
$2500
$50
Typical size
128 B
128 KiB
1 MiB
6 MiB
16 GiB
\(2^k\) bytes
\(7\)
\(17\)
\(20\)
\(22.6\)
\(34\)
Average length of load or store (in clock cycles)
Without cache:
100
With cache (assuming 90% hit rate):
\(.9 × 10 + .1 × 100 = 19\)
The memory hierarchy
Cache
Registers
L1
L2
L3
DRAM
Disk
Latency (cycles)
1
2
10
40
100
2500
Latency (ns)
0.4
0.8
4
16
40
1000
Price (US$/GiB)
priceless
n.f.s.
n.f.s.
$2500
$50
$0.10
Typical size
128 B
128 KiB
1 MiB
6 MiB
16 GiB
1 TiB
\(2^k\) bytes
\(7\)
\(17\)
\(20\)
\(22.6\)
\(34\)
40
Average length of load or store (in clock cycles)
Without cache:
100
With cache (assuming 90% hit rate):
\(.9 × 10 + .1 × 100 = 19\)
How to win big
Don’t take cache misses.
How to win big
Don’t take cache misses.
Okay, but how?
Locality!
Memory locality
Time locality:
You are likely to access things you’ve accessed recently.
Space locality:
You are likely to access things near things you’ve accessed recently.
Caching takes advantage of both:
Once an item is in cache, it’s ready to be accessed again.
Each cache line holds a block of words, which means when an item is in cache, so are its closest neighbors.
Memory locality
Time locality:
You are likely to access things you’ve accessed recently.
Space locality:
You are likely to access things near things you’ve accessed recently.
Caching takes advantage of both:
Once an item is in cache, it’s ready to be accessed again.
Each cache line holds a block of words, which means when an item is in cache, so are its closest neighbors.