CSAPP–Architecture Lab实验记录
实验准备
实验资料
- 《深入理解计算机系统》第四章 处理器体系结构
- lab官网
环境搭建
博主用的是 linux (ubuntu20.04)
1、首先下载实验资料
$ wget http://csapp.cs.cmu.edu/im/labs/archlab.tar
2、解压
$ tar xvf archlab-handout.tar
$ cd archlab-handout
$ tar xvf sim.tar
因为本套实验依赖tcl/tk
、flex
、bison
,我们需要安装这几个软件:
$ sudo apt install tcl tcl-dev tk tk-dev
$ sudo apt-get install bison flex
因为 Makefile 里写的 tcl 的版本已经比较老了,我们需要修改一下 Makefile:
$ sed -i "s/tcl8.5/tcl8.6/g" Makefile
$ sed -i "s/CFLAGS=/CFLAGS=-DUSE_INTERP_RESULT /g" Makefile
3、执行编译:
$ cd sim
$ make clean; make
实验内容
Part A
这个部分在 sim/misc 这个文档夹里完成。你的任务就是写 3 个 Y86-64 进程并且模拟它。这 3 个进程要实现的功能在 sim/misc/examples.c 里面。
examples.c
内容:
/*
* Architecture Lab: Part A
*
* High level specs for the functions that the students will rewrite
* in Y86-64 assembly language
*/
/* $begin examples */
/* linked list element */
typedef struct ELE {
long val;
struct ELE *next;
} *list_ptr;
/* sum_list - Sum the elements of a linked list */
long sum_list(list_ptr ls)
{
long val = 0;
while (ls) {
val += ls->val;
ls = ls->next;
}
return val;
}
/* rsum_list - Recursive version of sum_list */
long rsum_list(list_ptr ls)
{
if (!ls)
return 0;
else {
long val = ls->val;
long rest = rsum_list(ls->next);
return val + rest;
}
}
/* copy_block - Copy src to dest and return xor checksum of src */
long copy_block(long *src, long *dest, long len)
{
long result = 0;
while (len > 0) {
long val = *src++;
*dest++ = val;
result ^= val;
len--;
}
return result;
}
/* $end examples */
使用 YAS 将相应的进程转换成二进制,然后再把生成的二进制放到命令集模拟器 YIS 上运行。
下面是三组链表数据:
# Sample linked list
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0
sum.ys: 迭代计算链表元素和
Write a Y86-64 program sum.ys that iteratively sums the elements of a linked list. Your program should consist of some code that sets up the stack structure, invokes a function, and then halts. In this case, the function should be Y86-64 code for a function (sum list) that is functionally equivalent to the C sum list function in Figure 1
# sum_list.ys by linxc
# Execution begins at address 0
.pos 0
irmovq stack,%rsp
call main
halt
# Sample linked list
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0
main:
irmovq ele1,%rdi
call sum_list
ret
sum_list:
xorq %rax,%rax
jmp test
loop:
mrmovq (%rdi),%rsi
addq %rsi,%rax #求和
mrmovq 8(%rdi), %rdi # 将指针(下一个 struct 的地址)放进 %rdi
test:
andq %rdi,%rdi
jne loop
ret
stack:
接下来用 YAS 和 YIS 进行汇编并模拟运行,结果:
$ ./yas sum.ys
$ ./yis sum.yo
Stopped in 26 steps at PC = 0x13. Status 'HLT', CC Z=1 S=0 O=0
Changes to registers:
%rax: 0x0000000000000000 0x0000000000000cba
%rsp: 0x0000000000000000 0x0000000000000100
%r9: 0x0000000000000000 0x0000000000000c00
Changes to memory:
0x00f0: 0x0000000000000000 0x000000000000005b
0x00f8: 0x0000000000000000 0x0000000000000013
可以看到 %rax 的值就是标号 ele1,ele2,ele3 处三个元素的和 0xcba,并且可以看到部分寄存器和部分内存地址的值也发生了改变。
rsum.ys: 递归计算链表元素和
写一个类似的 Y86-64 进程 rsum.ys 递归的计算链表的和,链表元素与上面一样。
# rsum_list.ys by linxc
# Execution begins at address 0
.pos 0
irmovq stack,%rsp
call main
halt
# Sample linked list
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0
main:
irmovq ele1,%rdi
call rsum_list
ret
rsum_list:
pushq %r12
xorq %rax,%rax
andq %rdi,%rdi
je re
mrmovq (%rdi),%r12
mrmovq 8(%rdi), %rdi# 将指针(下一个 struct 的地址)放进 %rdi
call rsum_list
addq %r12,%rax
re: popq %r12
ret
.pos 0x100
stack:
结果:
$ ./yas rsum.ys
$ ./yis rsum.yo
Stopped in 42 steps at PC = 0x13. Status 'HLT', CC Z=0 S=0 O=0
Changes to registers:
%rax: 0x0000000000000000 0x0000000000000cba
%rsp: 0x0000000000000000 0x0000000000000100
Changes to memory:
0x00b8: 0x0000000000000000 0x0000000000000c00
0x00c0: 0x0000000000000000 0x0000000000000088
0x00c8: 0x0000000000000000 0x00000000000000b0
0x00d0: 0x0000000000000000 0x0000000000000088
0x00d8: 0x0000000000000000 0x000000000000000a
0x00e0: 0x0000000000000000 0x0000000000000088
0x00f0: 0x0000000000000000 0x000000000000005b
0x00f8: 0x0000000000000000 0x0000000000000013
copy.ys: 复制函数
将内存中的一块数据拷贝到另一个不重叠的内存位置,并计算被拷贝数据的 checksum(Xor)。C 语言代码如下:
# copy.ys by linxc
# Execution begins at address 0
.pos 0
irmovq stack,%rsp
call main
halt
.align 8
# Source block
src:
.quad 0x00a
.quad 0x0b0
.quad 0xc00
# Destination block
dest:
.quad 0x111
.quad 0x222
.quad 0x333
main:
irmovq src,%rdi
irmovq dest,%rsi
irmovq $3,%rdx
call copy_block
ret
copy_block:
pushq %r8
pushq %r9
pushq %r12
irmovq $1,%r9
irmovq $8,%r12
xorq %rax,%rax
jmp re
loop:
mrmovq 0(%rdi),%r8
addq %r12,%rdi
rmmovq %r8,(%rsi)
addq %r12,%rsi
xorq %r8,%rax #求和
subq %r9,%rdx
re:
andq %rdx, %rdx # len > 0?
jne loop
popq %r12
popq %r9
popq %r8
ret
.pos 0x100
stack:
运行结果如下:
$ ./yas copy.ys
$ ./yis copy.yo
Stopped in 45 steps at PC = 0x13. Status 'HLT', CC Z=1 S=0 O=0
Changes to registers:
%rax: 0x0000000000000000 0x0000000000000cba
%rsp: 0x0000000000000000 0x0000000000000100
%rsi: 0x0000000000000000 0x0000000000000048
%rdi: 0x0000000000000000 0x0000000000000030
Changes to memory:
0x0030: 0x0000000000000111 0x000000000000000a
0x0038: 0x0000000000000222 0x00000000000000b0
0x0040: 0x0000000000000333 0x0000000000000c00
0x00f0: 0x0000000000000000 0x000000000000006f
0x00f8: 0x0000000000000000 0x0000000000000013
Part B
这部分在目录 sim/seq 里完成。要求为处理器增加一个iaddq
指令。通过修改 seq-full.hcl
文档,使其支持 iaddq
命令。
按照提示,我们可以看书本p266,图4-18 irmovq
指令计算顺序,模拟iaddq
指令顺序:
- 取指:icode:ifun<—M1[PC] rA: rB <–M1[PC+1] valC <–M8[PC+2] valP <—PC+10
- 译码:valB <—R[rb]
- 执行:valE<–valC+valB
- 访存:无
- 写回:R[rb]<–valE
- 更新PC:PC<–valP
按照上面修改seq-full.hcl
文档内容如下:
#/* $begin seq-all-hcl */
####################################################################
# HCL Description of Control for Single Cycle Y86-64 Processor SEQ #
# Copyright (C) Randal E. Bryant, David R. O'Hallaron, 2010 #
####################################################################
## Your task is to implement the iaddq instruction
## The file contains a declaration of the icodes
## for iaddq (IIADDQ)
## Your job is to add the rest of the logic to make it work
####################################################################
# C Include's. Don't alter these #
####################################################################
quote '#include <stdio.h>'
quote '#include "isa.h"'
quote '#include "sim.h"'
quote 'int sim_main(int argc, char *argv[]);'
quote 'word_t gen_pc(){return 0;}'
quote 'int main(int argc, char *argv[])'
quote ' {plusmode=0;return sim_main(argc,argv);}'
####################################################################
# Declarations. Do not change/remove/delete any of these #
####################################################################
##### Symbolic representation of Y86-64 Instruction Codes #############
wordsig INOP 'I_NOP'
wordsig IHALT 'I_HALT'
wordsig IRRMOVQ 'I_RRMOVQ'
wordsig IIRMOVQ 'I_IRMOVQ'
wordsig IRMMOVQ 'I_RMMOVQ'
wordsig IMRMOVQ 'I_MRMOVQ'
wordsig IOPQ 'I_ALU'
wordsig IJXX 'I_JMP'
wordsig ICALL 'I_CALL'
wordsig IRET 'I_RET'
wordsig IPUSHQ 'I_PUSHQ'
wordsig IPOPQ 'I_POPQ'
# Instruction code for iaddq instruction
wordsig IIADDQ 'I_IADDQ'
##### Symbolic represenations of Y86-64 function codes #####
wordsig FNONE 'F_NONE' # Default function code
##### Symbolic representation of Y86-64 Registers referenced explicitly #####
wordsig RRSP 'REG_RSP' # Stack Pointer
wordsig RNONE 'REG_NONE' # Special value indicating "no register"
##### ALU Functions referenced explicitly #####
wordsig ALUADD 'A_ADD' # ALU should add its arguments
##### Possible instruction status values #####
wordsig SAOK 'STAT_AOK' # Normal execution
wordsig SADR 'STAT_ADR' # Invalid memory address
wordsig SINS 'STAT_INS' # Invalid instruction
wordsig SHLT 'STAT_HLT' # Halt instruction encountered
##### Signals that can be referenced by control logic ####################
##### Fetch stage inputs #####
wordsig pc 'pc' # Program counter
##### Fetch stage computations #####
wordsig imem_icode 'imem_icode' # icode field from instruction memory
wordsig imem_ifun 'imem_ifun' # ifun field from instruction memory
wordsig icode 'icode' # Instruction control code
wordsig ifun 'ifun' # Instruction function
wordsig rA 'ra' # rA field from instruction
wordsig rB 'rb' # rB field from instruction
wordsig valC 'valc' # Constant from instruction
wordsig valP 'valp' # Address of following instruction
boolsig imem_error 'imem_error' # Error signal from instruction memory
boolsig instr_valid 'instr_valid' # Is fetched instruction valid?
##### Decode stage computations #####
wordsig valA 'vala' # Value from register A port
wordsig valB 'valb' # Value from register B port
##### Execute stage computations #####
wordsig valE 'vale' # Value computed by ALU
boolsig Cnd 'cond' # Branch test
##### Memory stage computations #####
wordsig valM 'valm' # Value read from memory
boolsig dmem_error 'dmem_error' # Error signal from data memory
####################################################################
# Control Signal Definitions. #
####################################################################
################ Fetch Stage ###################################
# Determine instruction code
word icode = [
imem_error: INOP;
1: imem_icode; # Default: get from instruction memory
];
# Determine instruction function
word ifun = [
imem_error: FNONE;
1: imem_ifun; # Default: get from instruction memory
];
bool instr_valid = icode in
{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ };
# Does fetched instruction require a regid byte?
bool need_regids =
icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ,
IIRMOVQ, IRMMOVQ, IMRMOVQ,IIADDQ };
# Does fetched instruction require a constant word?
bool need_valC =
icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL,IIADDQ };
################ Decode Stage ###################################
## What register should be used as the A source?
word srcA = [
icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ } : rA;
icode in { IPOPQ, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the B source?
word srcB = [
icode in { IOPQ, IRMMOVQ, IMRMOVQ,IIADDQ } : rB;
icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the E destination?
word dstE = [
icode in { IRRMOVQ } && Cnd : rB;
icode in { IIRMOVQ, IOPQ,IIADDQ } : rB;
icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't write any register
];
## What register should be used as the M destination?
word dstM = [
icode in { IMRMOVQ, IPOPQ } : rA;
1 : RNONE; # Don't write any register
];
################ Execute Stage ###################################
## Select input A to ALU
word aluA = [
icode in { IRRMOVQ, IOPQ } : valA;
icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ,IIADDQ } : valC;
icode in { ICALL, IPUSHQ } : -8;
icode in { IRET, IPOPQ } : 8;
# Other instructions don't need ALU
];
## Select input B to ALU
word aluB = [
icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
IPUSHQ, IRET, IPOPQ,IIADDQ } : valB;
icode in { IRRMOVQ, IIRMOVQ } : 0;
# Other instructions don't need ALU
];
## Set the ALU function
word alufun = [
icode == IOPQ : ifun;
1 : ALUADD;
];
## Should the condition codes be updated?
bool set_cc = icode in { IOPQ };
################ Memory Stage ###################################
## Set read control signal
bool mem_read = icode in { IMRMOVQ, IPOPQ, IRET };
## Set write control signal
bool mem_write = icode in { IRMMOVQ, IPUSHQ, ICALL };
## Select memory address
word mem_addr = [
icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : valE;
icode in { IPOPQ, IRET } : valA;
# Other instructions don't need address
];
## Select memory input data
word mem_data = [
# Value from register
icode in { IRMMOVQ, IPUSHQ } : valA;
# Return PC
icode == ICALL : valP;
# Default: Don't write anything
];
## Determine instruction status
word Stat = [
imem_error || dmem_error : SADR;
!instr_valid: SINS;
icode == IHALT : SHLT;
1 : SAOK;
];
################ Program Counter Update ############################
## What address should instruction be fetched at
word new_pc = [
# Call. Use instruction constant
icode == ICALL : valC;
# Taken branch. Use instruction constant
icode == IJXX && Cnd : valC;
# Completion of RET instruction. Use value from stack
icode == IRET : valM;
# Default: Use incremented PC
1 : valP;
];
#/* $end seq-all-hcl */
接下来,检验一下修改对不对:
$ make VERSION=full
# Building the seq-full.hcl version of SEQ
../misc/hcl2c -n seq-full.hcl <seq-full.hcl >seq-full.c
gcc -Wall -O2 -isystem /usr/include/tcl8.5 -I../misc -DHAS_GUI -o ssim \
seq-full.c ssim.c ../misc/isa.c -L/usr/lib -ltk -ltcl -lm
ssim.c:20:10: fatal error: tk.h: No such file or directory
20 | #include <tk.h>
| ^~~~~~
compilation terminated.
make: *** [Makefile:44: ssim] Error 1
......
$ make VERSION=full
/usr/bin/ld: /tmp/ccbOoipJ.o:(.data.rel+0x0): undefined reference to `matherr'
collect2: error: ld returned 1 exit status
make: *** [Makefile:44: ssim] Error 1
我在执行make VERSION=full
命令的时候,报了上面的两个错误,对于第一个错误,这个刚开始一样只需执行下面两条命令:
$ sed -i "s/tcl8.5/tcl8.6/g" Makefile
$ sed -i "s/CFLAGS=/CFLAGS=-DUSE_INTERP_RESULT /g" Makefile
对于第二个错误,undefined reference to matherr
。我们可以在ssim.c
找到matherr
,然后注释掉那两行,因为没有用到。
修改完后,重新执行:
$ make VERSION=full
$ (cd ../ptest; make SIM=../seq/ssim TFLAGS=-i)
./optest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
Test op-iaddq-256-rdx failed
Test op-iaddq-4-rdx failed
Test op-iaddq-256-rbx failed
Test op-iaddq-4-rbx failed
Test op-iaddq-256-rsp failed
Test op-iaddq-4-rsp failed
6/58 ISA Checks Failed
./jtest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
Test ji-jle-64-32 failed
Test ji-jl-32-64 failed
Test ji-je-32-64 failed
Test ji-je-64-32 failed
Test ji-jne-32-64 failed
Test ji-jne-64-32 failed
Test ji-jge-32-64 failed
Test ji-jg-64-32 failed
8/96 ISA Checks Failed
./ctest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 22 ISA Checks Succeed
./htest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 756 ISA Checks Succeed
当看到All 756 ISA Checks Succeed
就是成功了。
Part C
这里我们要做的就是修改 sim/pipe/pipe-full.hcl
以及 sim/pipe/ncopy.ys
的内容。使我们的程序运行效率尽量高。在为 pipe-full.hcl
实现完 iaddq
之后。我们就可以分别使用如下指令测试我们的代码:
$ ./correctness.pl #结果是否正确
$ ./benchmark.pl #得出效率,分数越高结果越好
修改之后的pipe-full.hcl
文件:
#/* $begin pipe-all-hcl */
####################################################################
# HCL Description of Control for Pipelined Y86-64 Processor #
# Copyright (C) Randal E. Bryant, David R. O'Hallaron, 2014 #
####################################################################
## Your task is to implement the iaddq instruction
## The file contains a declaration of the icodes
## for iaddq (IIADDQ)
## Your job is to add the rest of the logic to make it work
####################################################################
# C Include's. Don't alter these #
####################################################################
quote '#include <stdio.h>'
quote '#include "isa.h"'
quote '#include "pipeline.h"'
quote '#include "stages.h"'
quote '#include "sim.h"'
quote 'int sim_main(int argc, char *argv[]);'
quote 'int main(int argc, char *argv[]){return sim_main(argc,argv);}'
####################################################################
# Declarations. Do not change/remove/delete any of these #
####################################################################
##### Symbolic representation of Y86-64 Instruction Codes #############
wordsig INOP 'I_NOP'
wordsig IHALT 'I_HALT'
wordsig IRRMOVQ 'I_RRMOVQ'
wordsig IIRMOVQ 'I_IRMOVQ'
wordsig IRMMOVQ 'I_RMMOVQ'
wordsig IMRMOVQ 'I_MRMOVQ'
wordsig IOPQ 'I_ALU'
wordsig IJXX 'I_JMP'
wordsig ICALL 'I_CALL'
wordsig IRET 'I_RET'
wordsig IPUSHQ 'I_PUSHQ'
wordsig IPOPQ 'I_POPQ'
# Instruction code for iaddq instruction
wordsig IIADDQ 'I_IADDQ'
##### Symbolic represenations of Y86-64 function codes #####
wordsig FNONE 'F_NONE' # Default function code
##### Symbolic representation of Y86-64 Registers referenced #####
wordsig RRSP 'REG_RSP' # Stack Pointer
wordsig RNONE 'REG_NONE' # Special value indicating "no register"
##### ALU Functions referenced explicitly ##########################
wordsig ALUADD 'A_ADD' # ALU should add its arguments
##### Possible instruction status values #####
wordsig SBUB 'STAT_BUB' # Bubble in stage
wordsig SAOK 'STAT_AOK' # Normal execution
wordsig SADR 'STAT_ADR' # Invalid memory address
wordsig SINS 'STAT_INS' # Invalid instruction
wordsig SHLT 'STAT_HLT' # Halt instruction encountered
##### Signals that can be referenced by control logic ##############
##### Pipeline Register F ##########################################
wordsig F_predPC 'pc_curr->pc' # Predicted value of PC
##### Intermediate Values in Fetch Stage ###########################
wordsig imem_icode 'imem_icode' # icode field from instruction memory
wordsig imem_ifun 'imem_ifun' # ifun field from instruction memory
wordsig f_icode 'if_id_next->icode' # (Possibly modified) instruction code
wordsig f_ifun 'if_id_next->ifun' # Fetched instruction function
wordsig f_valC 'if_id_next->valc' # Constant data of fetched instruction
wordsig f_valP 'if_id_next->valp' # Address of following instruction
boolsig imem_error 'imem_error' # Error signal from instruction memory
boolsig instr_valid 'instr_valid' # Is fetched instruction valid?
##### Pipeline Register D ##########################################
wordsig D_icode 'if_id_curr->icode' # Instruction code
wordsig D_rA 'if_id_curr->ra' # rA field from instruction
wordsig D_rB 'if_id_curr->rb' # rB field from instruction
wordsig D_valP 'if_id_curr->valp' # Incremented PC
##### Intermediate Values in Decode Stage #########################
wordsig d_srcA 'id_ex_next->srca' # srcA from decoded instruction
wordsig d_srcB 'id_ex_next->srcb' # srcB from decoded instruction
wordsig d_rvalA 'd_regvala' # valA read from register file
wordsig d_rvalB 'd_regvalb' # valB read from register file
##### Pipeline Register E ##########################################
wordsig E_icode 'id_ex_curr->icode' # Instruction code
wordsig E_ifun 'id_ex_curr->ifun' # Instruction function
wordsig E_valC 'id_ex_curr->valc' # Constant data
wordsig E_srcA 'id_ex_curr->srca' # Source A register ID
wordsig E_valA 'id_ex_curr->vala' # Source A value
wordsig E_srcB 'id_ex_curr->srcb' # Source B register ID
wordsig E_valB 'id_ex_curr->valb' # Source B value
wordsig E_dstE 'id_ex_curr->deste' # Destination E register ID
wordsig E_dstM 'id_ex_curr->destm' # Destination M register ID
##### Intermediate Values in Execute Stage #########################
wordsig e_valE 'ex_mem_next->vale' # valE generated by ALU
boolsig e_Cnd 'ex_mem_next->takebranch' # Does condition hold?
wordsig e_dstE 'ex_mem_next->deste' # dstE (possibly modified to be RNONE)
##### Pipeline Register M #########################
wordsig M_stat 'ex_mem_curr->status' # Instruction status
wordsig M_icode 'ex_mem_curr->icode' # Instruction code
wordsig M_ifun 'ex_mem_curr->ifun' # Instruction function
wordsig M_valA 'ex_mem_curr->vala' # Source A value
wordsig M_dstE 'ex_mem_curr->deste' # Destination E register ID
wordsig M_valE 'ex_mem_curr->vale' # ALU E value
wordsig M_dstM 'ex_mem_curr->destm' # Destination M register ID
boolsig M_Cnd 'ex_mem_curr->takebranch' # Condition flag
boolsig dmem_error 'dmem_error' # Error signal from instruction memory
##### Intermediate Values in Memory Stage ##########################
wordsig m_valM 'mem_wb_next->valm' # valM generated by memory
wordsig m_stat 'mem_wb_next->status' # stat (possibly modified to be SADR)
##### Pipeline Register W ##########################################
wordsig W_stat 'mem_wb_curr->status' # Instruction status
wordsig W_icode 'mem_wb_curr->icode' # Instruction code
wordsig W_dstE 'mem_wb_curr->deste' # Destination E register ID
wordsig W_valE 'mem_wb_curr->vale' # ALU E value
wordsig W_dstM 'mem_wb_curr->destm' # Destination M register ID
wordsig W_valM 'mem_wb_curr->valm' # Memory M value
####################################################################
# Control Signal Definitions. #
####################################################################
################ Fetch Stage ###################################
## What address should instruction be fetched at
word f_pc = [
# Mispredicted branch. Fetch at incremented PC
M_icode == IJXX && !M_Cnd : M_valA;
# Completion of RET instruction
W_icode == IRET : W_valM;
# Default: Use predicted value of PC
1 : F_predPC;
];
## Determine icode of fetched instruction
word f_icode = [
imem_error : INOP;
1: imem_icode;
];
# Determine ifun
word f_ifun = [
imem_error : FNONE;
1: imem_ifun;
];
# Is instruction valid?
bool instr_valid = f_icode in
{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ ,IIADDQ };
# Determine status code for fetched instruction
word f_stat = [
imem_error: SADR;
!instr_valid : SINS;
f_icode == IHALT : SHLT;
1 : SAOK;
];
# Does fetched instruction require a regid byte?
bool need_regids =
f_icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ,
IIRMOVQ, IRMMOVQ, IMRMOVQ,IIADDQ };
# Does fetched instruction require a constant word?
bool need_valC =
f_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL,IIADDQ };
# Predict next value of PC
word f_predPC = [
f_icode in { IJXX, ICALL } : f_valC;
1 : f_valP;
];
################ Decode Stage ######################################
## What register should be used as the A source?
word d_srcA = [
D_icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ } : D_rA;
D_icode in { IPOPQ, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the B source?
word d_srcB = [
D_icode in { IOPQ, IRMMOVQ, IMRMOVQ } : D_rB;
D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the E destination?
word d_dstE = [
D_icode in { IRRMOVQ, IIRMOVQ, IOPQ} : D_rB;
D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't write any register
];
## What register should be used as the M destination?
word d_dstM = [
D_icode in { IMRMOVQ, IPOPQ } : D_rA;
1 : RNONE; # Don't write any register
];
## What should be the A value?
## Forward into decode stage for valA
word d_valA = [
D_icode in { ICALL, IJXX } : D_valP; # Use incremented PC
d_srcA == e_dstE : e_valE; # Forward valE from execute
d_srcA == M_dstM : m_valM; # Forward valM from memory
d_srcA == M_dstE : M_valE; # Forward valE from memory
d_srcA == W_dstM : W_valM; # Forward valM from write back
d_srcA == W_dstE : W_valE; # Forward valE from write back
1 : d_rvalA; # Use value read from register file
];
word d_valB = [
d_srcB == e_dstE : e_valE; # Forward valE from execute
d_srcB == M_dstM : m_valM; # Forward valM from memory
d_srcB == M_dstE : M_valE; # Forward valE from memory
d_srcB == W_dstM : W_valM; # Forward valM from write back
d_srcB == W_dstE : W_valE; # Forward valE from write back
1 : d_rvalB; # Use value read from register file
];
################ Execute Stage #####################################
## Select input A to ALU
word aluA = [
E_icode in { IRRMOVQ, IOPQ } : E_valA;
E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : E_valC;
E_icode in { ICALL, IPUSHQ } : -8;
E_icode in { IRET, IPOPQ } : 8;
# Other instructions don't need ALU
];
## Select input B to ALU
word aluB = [
E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB;
E_icode in { IRRMOVQ, IIRMOVQ } : 0;
# Other instructions don't need ALU
];
## Set the ALU function
word alufun = [
E_icode == IOPQ : E_ifun;
1 : ALUADD;
];
## Should the condition codes be updated?
bool set_cc = E_icode == IOPQ &&
# State changes only during normal operation
!m_stat in { SADR, SINS, SHLT } && !W_stat in { SADR, SINS, SHLT };
## Generate valA in execute stage
word e_valA = E_valA; # Pass valA through stage
## Set dstE to RNONE in event of not-taken conditional move
word e_dstE = [
E_icode == IRRMOVQ && !e_Cnd : RNONE;
1 : E_dstE;
];
################ Memory Stage ######################################
## Select memory address
word mem_addr = [
M_icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : M_valE;
M_icode in { IPOPQ, IRET } : M_valA;
# Other instructions don't need address
];
## Set read control signal
bool mem_read = M_icode in { IMRMOVQ, IPOPQ, IRET };
## Set write control signal
bool mem_write = M_icode in { IRMMOVQ, IPUSHQ, ICALL };
#/* $begin pipe-m_stat-hcl */
## Update the status
word m_stat = [
dmem_error : SADR;
1 : M_stat;
];
#/* $end pipe-m_stat-hcl */
## Set E port register ID
word w_dstE = W_dstE;
## Set E port value
word w_valE = W_valE;
## Set M port register ID
word w_dstM = W_dstM;
## Set M port value
word w_valM = W_valM;
## Update processor status
word Stat = [
W_stat == SBUB : SAOK;
1 : W_stat;
];
################ Pipeline Register Control #########################
# Should I stall or inject a bubble into Pipeline Register F?
# At most one of these can be true.
bool F_bubble = 0;
bool F_stall =
# Conditions for a load/use hazard
E_icode in { IMRMOVQ, IPOPQ } &&
E_dstM in { d_srcA, d_srcB } ||
# Stalling at fetch while ret passes through pipeline
IRET in { D_icode, E_icode, M_icode };
# Should I stall or inject a bubble into Pipeline Register D?
# At most one of these can be true.
bool D_stall =
# Conditions for a load/use hazard
E_icode in { IMRMOVQ, IPOPQ } &&
E_dstM in { d_srcA, d_srcB };
bool D_bubble =
# Mispredicted branch
(E_icode == IJXX && !e_Cnd) ||
# Stalling at fetch while ret passes through pipeline
# but not condition for a load/use hazard
!(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB }) &&
IRET in { D_icode, E_icode, M_icode };
# Should I stall or inject a bubble into Pipeline Register E?
# At most one of these can be true.
bool E_stall = 0;
bool E_bubble =
# Mispredicted branch
(E_icode == IJXX && !e_Cnd) ||
# Conditions for a load/use hazard
E_icode in { IMRMOVQ, IPOPQ } &&
E_dstM in { d_srcA, d_srcB};
# Should I stall or inject a bubble into Pipeline Register M?
# At most one of these can be true.
bool M_stall = 0;
# Start injecting bubbles as soon as exception passes through memory stage
bool M_bubble = m_stat in { SADR, SINS, SHLT } || W_stat in { SADR, SINS, SHLT };
# Should I stall or inject a bubble into Pipeline Register W?
bool W_stall = W_stat in { SADR, SINS, SHLT };
bool W_bubble = 0;
#/* $end pipe-all-hcl */
测试一下正确性:
$ ./correctness.pl
Simulating with instruction set simulator yis
ncopy
0 OK
1 OK
2 OK
3 OK
4 OK
5 OK
6 OK
7 OK
8 OK
9 OK
10 OK
11 OK
12 OK
13 OK
14 OK
15 OK
16 OK
17 OK
18 OK
19 OK
20 OK
21 OK
22 OK
23 OK
24 OK
25 OK
26 OK
27 OK
28 OK
29 OK
30 OK
31 OK
32 OK
33 OK
34 OK
35 OK
36 OK
37 OK
38 OK
39 OK
40 OK
41 OK
42 OK
43 OK
44 OK
45 OK
46 OK
47 OK
48 OK
49 OK
50 OK
51 OK
52 OK
53 OK
54 OK
55 OK
56 OK
57 OK
58 OK
59 OK
60 OK
61 OK
62 OK
63 OK
64 OK
128 OK
192 OK
256 OK
68/68 pass correctness test
优化就没有做了,可以从CPU流水线特性考虑优化策略
总结
这次的实验让我对CPU的取指过程有了更深了解,自己尝试写汇编代码,刚开始也是会报很多错,然后又回去看代码,最后成功解决,锻炼了肉眼debug能力哈哈