CSAPP--Architecture Lab实验记录

CSAPP–Architecture Lab实验记录

实验准备

实验资料

  • 《深入理解计算机系统》第四章 处理器体系结构
  • lab官网

环境搭建

博主用的是 linux (ubuntu20.04)

1、首先下载实验资料

$ wget http://csapp.cs.cmu.edu/im/labs/archlab.tar

2、解压

$ tar xvf archlab-handout.tar
$ cd archlab-handout
$ tar xvf sim.tar

因为本套实验依赖tcl/tkflexbison,我们需要安装这几个软件:

$ sudo apt install tcl tcl-dev tk tk-dev 
$ sudo apt-get install bison flex

因为 Makefile 里写的 tcl 的版本已经比较老了,我们需要修改一下 Makefile:

$ sed -i "s/tcl8.5/tcl8.6/g" Makefile
$ sed -i "s/CFLAGS=/CFLAGS=-DUSE_INTERP_RESULT /g" Makefile

3、执行编译:

$ cd sim
$ make clean; make

实验内容

Part A

这个部分在 sim/misc 这个文档夹里完成。你的任务就是写 3 个 Y86-64 进程并且模拟它。这 3 个进程要实现的功能在 sim/misc/examples.c 里面。

examples.c内容:

/*
 * Architecture Lab: Part A
 *
 * High level specs for the functions that the students will rewrite
 * in Y86-64 assembly language
 */

/* $begin examples */
/* linked list element */
typedef struct ELE {
    long val;
    struct ELE *next;
} *list_ptr;

/* sum_list - Sum the elements of a linked list */
long sum_list(list_ptr ls)
{
    long val = 0;
    while (ls) {
        val += ls->val;
        ls = ls->next;
    }
    return val;
}

/* rsum_list - Recursive version of sum_list */
long rsum_list(list_ptr ls)
{
    if (!ls)
        return 0;
    else {
        long val = ls->val;
        long rest = rsum_list(ls->next);
        return val + rest;
    }
}

/* copy_block - Copy src to dest and return xor checksum of src */
long copy_block(long *src, long *dest, long len)
{
    long result = 0;
    while (len > 0) {
        long val = *src++;
        *dest++ = val;
        result ^= val;
        len--;
    }
    return result;
}
/* $end examples */

使用 YAS 将相应的进程转换成二进制,然后再把生成的二进制放到命令集模拟器 YIS 上运行。

下面是三组链表数据:

# Sample linked list
        .align 8
ele1:
        .quad 0x00a
        .quad ele2
ele2:
        .quad 0x0b0
        .quad ele3
ele3:
        .quad 0xc00
        .quad 0

sum.ys: 迭代计算链表元素和

Write a Y86-64 program sum.ys that iteratively sums the elements of a linked list. Your program should consist of some code that sets up the stack structure, invokes a function, and then halts. In this case, the function should be Y86-64 code for a function (sum list) that is functionally equivalent to the C sum list function in Figure 1

# sum_list.ys by linxc
# Execution begins at address 0

        .pos 0
        irmovq stack,%rsp
        call main
        halt
# Sample linked list
        .align 8
ele1:
        .quad 0x00a
        .quad ele2
ele2:
        .quad 0x0b0
        .quad ele3
ele3:
        .quad 0xc00
        .quad 0

main:
	irmovq ele1,%rdi
	call sum_list
	ret
		


sum_list:
        xorq %rax,%rax
		jmp test
loop:
		mrmovq (%rdi),%rsi
		addq %rsi,%rax #求和
		mrmovq 8(%rdi), %rdi # 将指针(下一个 struct 的地址)放进 %rdi
test:
		andq %rdi,%rdi
		jne loop
		ret
stack:


接下来用 YAS 和 YIS 进行汇编并模拟运行,结果:

$ ./yas sum.ys
$ ./yis sum.yo
Stopped in 26 steps at PC = 0x13.  Status 'HLT', CC Z=1 S=0 O=0
Changes to registers:
%rax:   0x0000000000000000      0x0000000000000cba
%rsp:   0x0000000000000000      0x0000000000000100
%r9:    0x0000000000000000      0x0000000000000c00

Changes to memory:
0x00f0: 0x0000000000000000      0x000000000000005b
0x00f8: 0x0000000000000000      0x0000000000000013

可以看到 %rax 的值就是标号 ele1,ele2,ele3 处三个元素的和 0xcba,并且可以看到部分寄存器和部分内存地址的值也发生了改变。

rsum.ys: 递归计算链表元素和

写一个类似的 Y86-64 进程 rsum.ys 递归的计算链表的和,链表元素与上面一样。

# rsum_list.ys by linxc
# Execution begins at address 0

        .pos 0
        irmovq stack,%rsp
        call main
        halt
# Sample linked list
        .align 8
ele1:
        .quad 0x00a
        .quad ele2
ele2:
        .quad 0x0b0
        .quad ele3
ele3:
        .quad 0xc00
        .quad 0

main:
        irmovq ele1,%rdi
        call rsum_list
        ret



rsum_list:
                pushq %r12
                xorq %rax,%rax
                andq %rdi,%rdi
                je re
                mrmovq (%rdi),%r12
                mrmovq 8(%rdi), %rdi# 将指针(下一个 struct 的地址)放进 %rdi
                call rsum_list
                addq %r12,%rax

re:             popq %r12
                ret


                .pos 0x100
stack:

结果:

$ ./yas rsum.ys
$  ./yis rsum.yo
Stopped in 42 steps at PC = 0x13.  Status 'HLT', CC Z=0 S=0 O=0
Changes to registers:
%rax:   0x0000000000000000      0x0000000000000cba
%rsp:   0x0000000000000000      0x0000000000000100

Changes to memory:
0x00b8: 0x0000000000000000      0x0000000000000c00
0x00c0: 0x0000000000000000      0x0000000000000088
0x00c8: 0x0000000000000000      0x00000000000000b0
0x00d0: 0x0000000000000000      0x0000000000000088
0x00d8: 0x0000000000000000      0x000000000000000a
0x00e0: 0x0000000000000000      0x0000000000000088
0x00f0: 0x0000000000000000      0x000000000000005b
0x00f8: 0x0000000000000000      0x0000000000000013

copy.ys: 复制函数

将内存中的一块数据拷贝到另一个不重叠的内存位置,并计算被拷贝数据的 checksum(Xor)。C 语言代码如下:

# copy.ys by linxc
# Execution begins at address 0

        .pos 0
        irmovq stack,%rsp
        call main
        halt
.align 8
# Source block
src:
        .quad 0x00a
        .quad 0x0b0
        .quad 0xc00
# Destination block
dest:
        .quad 0x111
        .quad 0x222
        .quad 0x333

main:
        irmovq src,%rdi
		irmovq dest,%rsi
		irmovq $3,%rdx
        call copy_block
        ret



copy_block:
		pushq %r8
		pushq %r9
		pushq %r12
		irmovq $1,%r9
		irmovq $8,%r12
        xorq %rax,%rax
		jmp re
loop:
		mrmovq 0(%rdi),%r8
		addq %r12,%rdi 
		rmmovq %r8,(%rsi)
		addq %r12,%rsi
		xorq %r8,%rax #求和
		subq %r9,%rdx 	
re:
		andq %rdx, %rdx      # len > 0?
		jne loop
		popq %r12
		popq %r9
        popq %r8
		ret
		.pos 0x100
stack:

运行结果如下:

$ ./yas copy.ys
$ ./yis copy.yo
Stopped in 45 steps at PC = 0x13.  Status 'HLT', CC Z=1 S=0 O=0
Changes to registers:
%rax:   0x0000000000000000      0x0000000000000cba
%rsp:   0x0000000000000000      0x0000000000000100
%rsi:   0x0000000000000000      0x0000000000000048
%rdi:   0x0000000000000000      0x0000000000000030

Changes to memory:
0x0030: 0x0000000000000111      0x000000000000000a
0x0038: 0x0000000000000222      0x00000000000000b0
0x0040: 0x0000000000000333      0x0000000000000c00
0x00f0: 0x0000000000000000      0x000000000000006f
0x00f8: 0x0000000000000000      0x0000000000000013

Part B

这部分在目录 sim/seq 里完成。要求为处理器增加一个iaddq指令。通过修改 seq-full.hcl 文档,使其支持 iaddq 命令。

按照提示,我们可以看书本p266,图4-18 irmovq指令计算顺序,模拟iaddq指令顺序:

  1. 取指:icode:ifun<—M1[PC] rA: rB <–M1[PC+1] valC <–M8[PC+2] valP <—PC+10
  2. 译码:valB <—R[rb]
  3. 执行:valE<–valC+valB
  4. 访存:无
  5. 写回:R[rb]<–valE
  6. 更新PC:PC<–valP

按照上面修改seq-full.hcl 文档内容如下:

#/* $begin seq-all-hcl */
####################################################################
#  HCL Description of Control for Single Cycle Y86-64 Processor SEQ   #
#  Copyright (C) Randal E. Bryant, David R. O'Hallaron, 2010       #
####################################################################

## Your task is to implement the iaddq instruction
## The file contains a declaration of the icodes
## for iaddq (IIADDQ)
## Your job is to add the rest of the logic to make it work

####################################################################
#    C Include's.  Don't alter these                               #
####################################################################

quote '#include <stdio.h>'
quote '#include "isa.h"'
quote '#include "sim.h"'
quote 'int sim_main(int argc, char *argv[]);'
quote 'word_t gen_pc(){return 0;}'
quote 'int main(int argc, char *argv[])'
quote '  {plusmode=0;return sim_main(argc,argv);}'

####################################################################
#    Declarations.  Do not change/remove/delete any of these       #
####################################################################

##### Symbolic representation of Y86-64 Instruction Codes #############
wordsig INOP    'I_NOP'
wordsig IHALT   'I_HALT'
wordsig IRRMOVQ 'I_RRMOVQ'
wordsig IIRMOVQ 'I_IRMOVQ'
wordsig IRMMOVQ 'I_RMMOVQ'
wordsig IMRMOVQ 'I_MRMOVQ'
wordsig IOPQ    'I_ALU'
wordsig IJXX    'I_JMP'
wordsig ICALL   'I_CALL'
wordsig IRET    'I_RET'
wordsig IPUSHQ  'I_PUSHQ'
wordsig IPOPQ   'I_POPQ'
# Instruction code for iaddq instruction
wordsig IIADDQ  'I_IADDQ'

##### Symbolic represenations of Y86-64 function codes                  #####
wordsig FNONE    'F_NONE'        # Default function code

##### Symbolic representation of Y86-64 Registers referenced explicitly #####
wordsig RRSP     'REG_RSP'      # Stack Pointer
wordsig RNONE    'REG_NONE'     # Special value indicating "no register"

##### ALU Functions referenced explicitly                            #####
wordsig ALUADD  'A_ADD'         # ALU should add its arguments

##### Possible instruction status values                             #####
wordsig SAOK    'STAT_AOK'      # Normal execution
wordsig SADR    'STAT_ADR'      # Invalid memory address
wordsig SINS    'STAT_INS'      # Invalid instruction
wordsig SHLT    'STAT_HLT'      # Halt instruction encountered

##### Signals that can be referenced by control logic ####################

##### Fetch stage inputs                #####
wordsig pc 'pc'                         # Program counter
##### Fetch stage computations          #####
wordsig imem_icode 'imem_icode'         # icode field from instruction memory
wordsig imem_ifun  'imem_ifun'          # ifun field from instruction memory
wordsig icode     'icode'               # Instruction control code
wordsig ifun      'ifun'                # Instruction function
wordsig rA        'ra'                  # rA field from instruction
wordsig rB        'rb'                  # rB field from instruction
wordsig valC      'valc'                # Constant from instruction
wordsig valP      'valp'                # Address of following instruction
boolsig imem_error 'imem_error'         # Error signal from instruction memory
boolsig instr_valid 'instr_valid'       # Is fetched instruction valid?

##### Decode stage computations         #####
wordsig valA    'vala'                  # Value from register A port
wordsig valB    'valb'                  # Value from register B port

##### Execute stage computations        #####
wordsig valE    'vale'                  # Value computed by ALU
boolsig Cnd     'cond'                  # Branch test

##### Memory stage computations         #####
wordsig valM    'valm'                  # Value read from memory
boolsig dmem_error 'dmem_error'         # Error signal from data memory


####################################################################
#    Control Signal Definitions.                                   #
####################################################################

################ Fetch Stage     ###################################

# Determine instruction code
word icode = [
        imem_error: INOP;
        1: imem_icode;          # Default: get from instruction memory
];

# Determine instruction function
word ifun = [
        imem_error: FNONE;
        1: imem_ifun;           # Default: get from instruction memory
];

bool instr_valid = icode in
        { INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
               IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ };

# Does fetched instruction require a regid byte?
bool need_regids =
        icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ,
                     IIRMOVQ, IRMMOVQ, IMRMOVQ,IIADDQ };

# Does fetched instruction require a constant word?
bool need_valC =
        icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL,IIADDQ };

################ Decode Stage    ###################################

## What register should be used as the A source?
word srcA = [
        icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ  } : rA;
        icode in { IPOPQ, IRET } : RRSP;
        1 : RNONE; # Don't need register
];

## What register should be used as the B source?
word srcB = [
        icode in { IOPQ, IRMMOVQ, IMRMOVQ,IIADDQ  } : rB;
        icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
        1 : RNONE;  # Don't need register
];

## What register should be used as the E destination?
word dstE = [
        icode in { IRRMOVQ } && Cnd : rB;
        icode in { IIRMOVQ, IOPQ,IIADDQ } : rB;
        icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
        1 : RNONE;  # Don't write any register
];

## What register should be used as the M destination?
word dstM = [
        icode in { IMRMOVQ, IPOPQ } : rA;
        1 : RNONE;  # Don't write any register
];

################ Execute Stage   ###################################

## Select input A to ALU
word aluA = [
        icode in { IRRMOVQ, IOPQ } : valA;
        icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ,IIADDQ  } : valC;
        icode in { ICALL, IPUSHQ } : -8;
        icode in { IRET, IPOPQ } : 8;
        # Other instructions don't need ALU
];

## Select input B to ALU
word aluB = [
        icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
                      IPUSHQ, IRET, IPOPQ,IIADDQ  } : valB;
        icode in { IRRMOVQ, IIRMOVQ } : 0;
        # Other instructions don't need ALU
];

## Set the ALU function
word alufun = [
        icode == IOPQ : ifun;
        1 : ALUADD;
];

## Should the condition codes be updated?
bool set_cc = icode in { IOPQ };

################ Memory Stage    ###################################

## Set read control signal
bool mem_read = icode in { IMRMOVQ, IPOPQ, IRET };

## Set write control signal
bool mem_write = icode in { IRMMOVQ, IPUSHQ, ICALL };

## Select memory address
word mem_addr = [
        icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : valE;
        icode in { IPOPQ, IRET } : valA;
        # Other instructions don't need address
];

## Select memory input data
word mem_data = [
        # Value from register
        icode in { IRMMOVQ, IPUSHQ } : valA;
        # Return PC
        icode == ICALL : valP;
        # Default: Don't write anything
];

## Determine instruction status
word Stat = [
        imem_error || dmem_error : SADR;
        !instr_valid: SINS;
        icode == IHALT : SHLT;
        1 : SAOK;
];

################ Program Counter Update ############################

## What address should instruction be fetched at

word new_pc = [
        # Call.  Use instruction constant
        icode == ICALL : valC;
        # Taken branch.  Use instruction constant
        icode == IJXX && Cnd : valC;
        # Completion of RET instruction.  Use value from stack
        icode == IRET : valM;
        # Default: Use incremented PC
        1 : valP;
];
#/* $end seq-all-hcl */

接下来,检验一下修改对不对:

$ make VERSION=full
# Building the seq-full.hcl version of SEQ
../misc/hcl2c -n seq-full.hcl <seq-full.hcl >seq-full.c
gcc -Wall -O2 -isystem /usr/include/tcl8.5 -I../misc -DHAS_GUI -o ssim \
        seq-full.c ssim.c ../misc/isa.c -L/usr/lib -ltk -ltcl -lm
ssim.c:20:10: fatal error: tk.h: No such file or directory
   20 | #include <tk.h>
      |          ^~~~~~
compilation terminated.
make: *** [Makefile:44: ssim] Error 1

......

$ make VERSION=full
/usr/bin/ld: /tmp/ccbOoipJ.o:(.data.rel+0x0): undefined reference to `matherr'
collect2: error: ld returned 1 exit status
make: *** [Makefile:44: ssim] Error 1

我在执行make VERSION=full命令的时候,报了上面的两个错误,对于第一个错误,这个刚开始一样只需执行下面两条命令:

$ sed -i "s/tcl8.5/tcl8.6/g" Makefile
$ sed -i "s/CFLAGS=/CFLAGS=-DUSE_INTERP_RESULT /g" Makefile 

对于第二个错误,undefined reference to matherr。我们可以在ssim.c找到matherr,然后注释掉那两行,因为没有用到。

修改完后,重新执行:

$  make VERSION=full
$ (cd ../ptest; make SIM=../seq/ssim TFLAGS=-i)
./optest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
Test op-iaddq-256-rdx failed
Test op-iaddq-4-rdx failed
Test op-iaddq-256-rbx failed
Test op-iaddq-4-rbx failed
Test op-iaddq-256-rsp failed
Test op-iaddq-4-rsp failed
  6/58 ISA Checks Failed
./jtest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
Test ji-jle-64-32 failed
Test ji-jl-32-64 failed
Test ji-je-32-64 failed
Test ji-je-64-32 failed
Test ji-jne-32-64 failed
Test ji-jne-64-32 failed
Test ji-jge-32-64 failed
Test ji-jg-64-32 failed
  8/96 ISA Checks Failed
./ctest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 22 ISA Checks Succeed
./htest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 756 ISA Checks Succeed

当看到All 756 ISA Checks Succeed就是成功了。

Part C

这里我们要做的就是修改 sim/pipe/pipe-full.hcl 以及 sim/pipe/ncopy.ys 的内容。使我们的程序运行效率尽量高。在为 pipe-full.hcl 实现完 iaddq 之后。我们就可以分别使用如下指令测试我们的代码:

$ ./correctness.pl #结果是否正确
$ ./benchmark.pl #得出效率,分数越高结果越好

修改之后的pipe-full.hcl文件:

#/* $begin pipe-all-hcl */
####################################################################
#    HCL Description of Control for Pipelined Y86-64 Processor     #
#    Copyright (C) Randal E. Bryant, David R. O'Hallaron, 2014     #
####################################################################

## Your task is to implement the iaddq instruction
## The file contains a declaration of the icodes
## for iaddq (IIADDQ)
## Your job is to add the rest of the logic to make it work

####################################################################
#    C Include's.  Don't alter these                               #
####################################################################

quote '#include <stdio.h>'
quote '#include "isa.h"'
quote '#include "pipeline.h"'
quote '#include "stages.h"'
quote '#include "sim.h"'
quote 'int sim_main(int argc, char *argv[]);'
quote 'int main(int argc, char *argv[]){return sim_main(argc,argv);}'

####################################################################
#    Declarations.  Do not change/remove/delete any of these       #
####################################################################

##### Symbolic representation of Y86-64 Instruction Codes #############
wordsig INOP    'I_NOP'
wordsig IHALT   'I_HALT'
wordsig IRRMOVQ 'I_RRMOVQ'
wordsig IIRMOVQ 'I_IRMOVQ'
wordsig IRMMOVQ 'I_RMMOVQ'
wordsig IMRMOVQ 'I_MRMOVQ'
wordsig IOPQ    'I_ALU'
wordsig IJXX    'I_JMP'
wordsig ICALL   'I_CALL'
wordsig IRET    'I_RET'
wordsig IPUSHQ  'I_PUSHQ'
wordsig IPOPQ   'I_POPQ'
# Instruction code for iaddq instruction
wordsig IIADDQ  'I_IADDQ'

##### Symbolic represenations of Y86-64 function codes            #####
wordsig FNONE    'F_NONE'        # Default function code

##### Symbolic representation of Y86-64 Registers referenced      #####
wordsig RRSP     'REG_RSP'           # Stack Pointer
wordsig RNONE    'REG_NONE'          # Special value indicating "no register"

##### ALU Functions referenced explicitly ##########################
wordsig ALUADD  'A_ADD'              # ALU should add its arguments

##### Possible instruction status values                       #####
wordsig SBUB    'STAT_BUB'      # Bubble in stage
wordsig SAOK    'STAT_AOK'      # Normal execution
wordsig SADR    'STAT_ADR'      # Invalid memory address
wordsig SINS    'STAT_INS'      # Invalid instruction
wordsig SHLT    'STAT_HLT'      # Halt instruction encountered

##### Signals that can be referenced by control logic ##############

##### Pipeline Register F ##########################################

wordsig F_predPC 'pc_curr->pc'       # Predicted value of PC

##### Intermediate Values in Fetch Stage ###########################

wordsig imem_icode  'imem_icode'      # icode field from instruction memory
wordsig imem_ifun   'imem_ifun'       # ifun  field from instruction memory
wordsig f_icode 'if_id_next->icode'  # (Possibly modified) instruction code
wordsig f_ifun  'if_id_next->ifun'   # Fetched instruction function
wordsig f_valC  'if_id_next->valc'   # Constant data of fetched instruction
wordsig f_valP  'if_id_next->valp'   # Address of following instruction
boolsig imem_error 'imem_error'      # Error signal from instruction memory
boolsig instr_valid 'instr_valid'    # Is fetched instruction valid?

##### Pipeline Register D ##########################################
wordsig D_icode 'if_id_curr->icode'   # Instruction code
wordsig D_rA 'if_id_curr->ra'        # rA field from instruction
wordsig D_rB 'if_id_curr->rb'        # rB field from instruction
wordsig D_valP 'if_id_curr->valp'     # Incremented PC

##### Intermediate Values in Decode Stage  #########################

wordsig d_srcA   'id_ex_next->srca'  # srcA from decoded instruction
wordsig d_srcB   'id_ex_next->srcb'  # srcB from decoded instruction
wordsig d_rvalA 'd_regvala'          # valA read from register file
wordsig d_rvalB 'd_regvalb'          # valB read from register file

##### Pipeline Register E ##########################################
wordsig E_icode 'id_ex_curr->icode'   # Instruction code
wordsig E_ifun  'id_ex_curr->ifun'    # Instruction function
wordsig E_valC  'id_ex_curr->valc'    # Constant data
wordsig E_srcA  'id_ex_curr->srca'    # Source A register ID
wordsig E_valA  'id_ex_curr->vala'    # Source A value
wordsig E_srcB  'id_ex_curr->srcb'    # Source B register ID
wordsig E_valB  'id_ex_curr->valb'    # Source B value
wordsig E_dstE 'id_ex_curr->deste'    # Destination E register ID
wordsig E_dstM 'id_ex_curr->destm'    # Destination M register ID

##### Intermediate Values in Execute Stage #########################
wordsig e_valE 'ex_mem_next->vale'      # valE generated by ALU
boolsig e_Cnd 'ex_mem_next->takebranch' # Does condition hold?
wordsig e_dstE 'ex_mem_next->deste'      # dstE (possibly modified to be RNONE)

##### Pipeline Register M                  #########################
wordsig M_stat 'ex_mem_curr->status'     # Instruction status
wordsig M_icode 'ex_mem_curr->icode'    # Instruction code
wordsig M_ifun  'ex_mem_curr->ifun'     # Instruction function
wordsig M_valA  'ex_mem_curr->vala'      # Source A value
wordsig M_dstE 'ex_mem_curr->deste'     # Destination E register ID
wordsig M_valE  'ex_mem_curr->vale'      # ALU E value
wordsig M_dstM 'ex_mem_curr->destm'     # Destination M register ID
boolsig M_Cnd 'ex_mem_curr->takebranch' # Condition flag
boolsig dmem_error 'dmem_error'         # Error signal from instruction memory

##### Intermediate Values in Memory Stage ##########################
wordsig m_valM 'mem_wb_next->valm'      # valM generated by memory
wordsig m_stat 'mem_wb_next->status'    # stat (possibly modified to be SADR)

##### Pipeline Register W ##########################################
wordsig W_stat 'mem_wb_curr->status'     # Instruction status
wordsig W_icode 'mem_wb_curr->icode'    # Instruction code
wordsig W_dstE 'mem_wb_curr->deste'     # Destination E register ID
wordsig W_valE  'mem_wb_curr->vale'      # ALU E value
wordsig W_dstM 'mem_wb_curr->destm'     # Destination M register ID
wordsig W_valM  'mem_wb_curr->valm'     # Memory M value

####################################################################
#    Control Signal Definitions.                                   #
####################################################################

################ Fetch Stage     ###################################

## What address should instruction be fetched at
word f_pc = [
        # Mispredicted branch.  Fetch at incremented PC
        M_icode == IJXX && !M_Cnd : M_valA;
        # Completion of RET instruction
        W_icode == IRET : W_valM;
        # Default: Use predicted value of PC
        1 : F_predPC;
];

## Determine icode of fetched instruction
word f_icode = [
        imem_error : INOP;
        1: imem_icode;
];

# Determine ifun
word f_ifun = [
        imem_error : FNONE;
        1: imem_ifun;
];

# Is instruction valid?
bool instr_valid = f_icode in
        { INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
          IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ ,IIADDQ };

# Determine status code for fetched instruction
word f_stat = [
        imem_error: SADR;
        !instr_valid : SINS;
        f_icode == IHALT : SHLT;
        1 : SAOK;
];

# Does fetched instruction require a regid byte?
bool need_regids =
        f_icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ,
                     IIRMOVQ, IRMMOVQ, IMRMOVQ,IIADDQ };

# Does fetched instruction require a constant word?
bool need_valC =
        f_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL,IIADDQ };

# Predict next value of PC
word f_predPC = [
        f_icode in { IJXX, ICALL } : f_valC;
        1 : f_valP;
];

################ Decode Stage ######################################


## What register should be used as the A source?
word d_srcA = [
        D_icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ  } : D_rA;
        D_icode in { IPOPQ, IRET } : RRSP;
        1 : RNONE; # Don't need register
];

## What register should be used as the B source?
word d_srcB = [
        D_icode in { IOPQ, IRMMOVQ, IMRMOVQ  } : D_rB;
        D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
        1 : RNONE;  # Don't need register
];

## What register should be used as the E destination?
word d_dstE = [
        D_icode in { IRRMOVQ, IIRMOVQ, IOPQ} : D_rB;
        D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
        1 : RNONE;  # Don't write any register
];

## What register should be used as the M destination?
word d_dstM = [
        D_icode in { IMRMOVQ, IPOPQ } : D_rA;
        1 : RNONE;  # Don't write any register
];

## What should be the A value?
## Forward into decode stage for valA
word d_valA = [
        D_icode in { ICALL, IJXX } : D_valP; # Use incremented PC
        d_srcA == e_dstE : e_valE;    # Forward valE from execute
        d_srcA == M_dstM : m_valM;    # Forward valM from memory
        d_srcA == M_dstE : M_valE;    # Forward valE from memory
        d_srcA == W_dstM : W_valM;    # Forward valM from write back
        d_srcA == W_dstE : W_valE;    # Forward valE from write back
        1 : d_rvalA;  # Use value read from register file
];

word d_valB = [
        d_srcB == e_dstE : e_valE;    # Forward valE from execute
        d_srcB == M_dstM : m_valM;    # Forward valM from memory
        d_srcB == M_dstE : M_valE;    # Forward valE from memory
        d_srcB == W_dstM : W_valM;    # Forward valM from write back
        d_srcB == W_dstE : W_valE;    # Forward valE from write back
        1 : d_rvalB;  # Use value read from register file
];

################ Execute Stage #####################################

## Select input A to ALU
word aluA = [
        E_icode in { IRRMOVQ, IOPQ } : E_valA;
        E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : E_valC;
        E_icode in { ICALL, IPUSHQ } : -8;
        E_icode in { IRET, IPOPQ } : 8;
        # Other instructions don't need ALU
];

## Select input B to ALU
word aluB = [
        E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
                     IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB;
        E_icode in { IRRMOVQ, IIRMOVQ } : 0;
        # Other instructions don't need ALU
];

## Set the ALU function
word alufun = [
        E_icode == IOPQ : E_ifun;
        1 : ALUADD;
];

## Should the condition codes be updated?
bool set_cc = E_icode == IOPQ &&
        # State changes only during normal operation
        !m_stat in { SADR, SINS, SHLT } && !W_stat in { SADR, SINS, SHLT };

## Generate valA in execute stage
word e_valA = E_valA;    # Pass valA through stage

## Set dstE to RNONE in event of not-taken conditional move
word e_dstE = [
        E_icode == IRRMOVQ && !e_Cnd : RNONE;
        1 : E_dstE;
];

################ Memory Stage ######################################

## Select memory address
word mem_addr = [
        M_icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : M_valE;
        M_icode in { IPOPQ, IRET } : M_valA;
        # Other instructions don't need address
];

## Set read control signal
bool mem_read = M_icode in { IMRMOVQ, IPOPQ, IRET };

## Set write control signal
bool mem_write = M_icode in { IRMMOVQ, IPUSHQ, ICALL };

#/* $begin pipe-m_stat-hcl */
## Update the status
word m_stat = [
        dmem_error : SADR;
        1 : M_stat;
];
#/* $end pipe-m_stat-hcl */

## Set E port register ID
word w_dstE = W_dstE;

## Set E port value
word w_valE = W_valE;

## Set M port register ID
word w_dstM = W_dstM;

## Set M port value
word w_valM = W_valM;

## Update processor status
word Stat = [
        W_stat == SBUB : SAOK;
        1 : W_stat;
];

################ Pipeline Register Control #########################

# Should I stall or inject a bubble into Pipeline Register F?
# At most one of these can be true.
bool F_bubble = 0;
bool F_stall =
        # Conditions for a load/use hazard
        E_icode in { IMRMOVQ, IPOPQ } &&
         E_dstM in { d_srcA, d_srcB } ||
        # Stalling at fetch while ret passes through pipeline
        IRET in { D_icode, E_icode, M_icode };

# Should I stall or inject a bubble into Pipeline Register D?
# At most one of these can be true.
bool D_stall =
        # Conditions for a load/use hazard
        E_icode in { IMRMOVQ, IPOPQ } &&
         E_dstM in { d_srcA, d_srcB };

bool D_bubble =
        # Mispredicted branch
        (E_icode == IJXX && !e_Cnd) ||
        # Stalling at fetch while ret passes through pipeline
        # but not condition for a load/use hazard
        !(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB }) &&
          IRET in { D_icode, E_icode, M_icode };

# Should I stall or inject a bubble into Pipeline Register E?
# At most one of these can be true.
bool E_stall = 0;
bool E_bubble =
        # Mispredicted branch
        (E_icode == IJXX && !e_Cnd) ||
        # Conditions for a load/use hazard
        E_icode in { IMRMOVQ, IPOPQ } &&
         E_dstM in { d_srcA, d_srcB};

# Should I stall or inject a bubble into Pipeline Register M?
# At most one of these can be true.
bool M_stall = 0;
# Start injecting bubbles as soon as exception passes through memory stage
bool M_bubble = m_stat in { SADR, SINS, SHLT } || W_stat in { SADR, SINS, SHLT };

# Should I stall or inject a bubble into Pipeline Register W?
bool W_stall = W_stat in { SADR, SINS, SHLT };
bool W_bubble = 0;
#/* $end pipe-all-hcl */

测试一下正确性:

$ ./correctness.pl
Simulating with instruction set simulator yis
        ncopy
0       OK
1       OK
2       OK
3       OK
4       OK
5       OK
6       OK
7       OK
8       OK
9       OK
10      OK
11      OK
12      OK
13      OK
14      OK
15      OK
16      OK
17      OK
18      OK
19      OK
20      OK
21      OK
22      OK
23      OK
24      OK
25      OK
26      OK
27      OK
28      OK
29      OK
30      OK
31      OK
32      OK
33      OK
34      OK
35      OK
36      OK
37      OK
38      OK
39      OK
40      OK
41      OK
42      OK
43      OK
44      OK
45      OK
46      OK
47      OK
48      OK
49      OK
50      OK
51      OK
52      OK
53      OK
54      OK
55      OK
56      OK
57      OK
58      OK
59      OK
60      OK
61      OK
62      OK
63      OK
64      OK
128     OK
192     OK
256     OK
68/68 pass correctness test

​ 优化就没有做了,可以从CPU流水线特性考虑优化策略

总结

这次的实验让我对CPU的取指过程有了更深了解,自己尝试写汇编代码,刚开始也是会报很多错,然后又回去看代码,最后成功解决,锻炼了肉眼debug能力哈哈