Add widen floating-point matrix multiplication instructions for mixed…

… precision Add move instruction between scalar registers and matrix registers for debug purpose Change mrelease to initial ms status/add mzero for security issues Remove streaming memory access instruction, Compatible with zhintntl extensions instead of customizing hint operations in extensions Support new data types int4/bf16 Add RLEN, modify MLEN definition for a clearer programming model Reorganize the matrix CSR to be more accurate and meet the RISC-V standards Modify arithmetic instructions opcode, more consistent with RISC-V coding habits New intrisic style with function overloading
XUANTIE-RV · Jun 2, 2023 · 481224d · 481224d
1 parent 4b52908
commit 481224d
Show file tree

Hide file tree

Showing 8 changed files with 604 additions and 503 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -1,18 +1,18 @@
 [submodule "shl"]
 	path = shl
-	url = [email protected]:T-head-Semi/csi-nn2.git
-	branch = matrix-extension
+	url = https://github.com/T-head-Semi/csi-nn2.git
+	branch =  matrix-extension
+[submodule "spec/docs-resources"]
+	path = doc/docs-resources
+	url = https://github.com/riscv/docs-resources.git
 [submodule "hhb"]
 	path = hhb
-	url = git@github.com:T-head-Semi/tvm.git
-	branch = matrix-extension
+	url = https://github.com/T-head-Semi/tvm.git
+	branch =  matrix-extension
 [submodule "qemu"]
 	path = qemu
 	url = [email protected]:T-head-Semi/qemu.git
-	branch = thead-open-matrix-v0.2
-[submodule "doc/docs-resources"]
-	path = doc/docs-resources
-	url = [email protected]:riscv/docs-resources.git
+	branch = thead-open-matrix-v0.3
 [submodule "xuantie-gnu-toolchain"]
 	path = xuantie-gnu-toolchain
 	url = [email protected]:T-head-Semi/xuantie-gnu-toolchain.git

diff --git a/README.md b/README.md
@@ -8,12 +8,12 @@ This is a matrix extension proposal for AI applications under RISC-V architectur
     - Peak performance of the extension varies from 0.125 Tops/Ghz to 32 Tops/Ghz
     - Binary portability
 * Multiple data types
-    - Support int8/int16/fp16/fp32
+    - Support int4/int8/int16/fp16/bf16/fp32
 * Independence
     - Strongly inspired by the RISC-V Vector extension
     - Decoupled architecture from Vector extension
 * Extensibility for future
-    - Support extensions for bf16/int4 and other future extensions
+    - Support extensions for fp8/fp4 and other future extensions
 
 The extension is still under construction, and this is a preview demo project.
 Some key directories are shown below.
@@ -25,10 +25,10 @@ Some key directories are shown below.
     |--intrinsic            ## The Matrix Extension intrinsic API Reference Manual
 |--shl/                     ## A neural networks library using RISC-V Matrix Extension
 |--hhb/                     ## A toolkit used for deploying neural network models
+|--qemu/                    ## Emulator
 |--xuantie-gnu-toolchain/   ## GNU toolchain
     |--riscv-gcc/           ## Compiler
     |--riscv-binutils-gdb/  ## Assembler
-|--qemu/                    ## Emulator
 |--demos/               
     |--resnet50             ## A resnet50 evaluation demo using nn library
     |--GEMM                 ## A GEMM evaluation demo using intrinsic
@@ -60,8 +60,6 @@ For more information on AsciiDoctor, specification guidelines, or building local
 RISC-V Matrix Extension Specification is kept in ./spec.
 User guide and reference manual for RISC-V Matrix Extension tools are kept in ./doc.
 
-We will also release our latest documentation in the Releases.
-
 The final documents form of PDF can be generated using the `make` command under corresponding folder. The generation method of each document is as follows.
 
 | Folder | Command     |     Documents |
@@ -89,9 +87,9 @@ make
 Get your own case and compile into matrix.elf. Both intrinsic and nn libraries can be used to perform this step.
 Please refer to [T-HEAD GNU Compiler Toolchain](https://github.com/T-head-Semi/xuantie-gnu-toolchain) or [HHB](https://www.yuque.com/za4k4z/kvkcoh/sxltga) and [SHL](https://github.com/T-head-Semi/csi-nn2) for details.
 
-Evaluation matrix performance on qemu with RISC-V Matrix Extension(with vector length set to VLEN and matrix length set to MLEN)
+Evaluation matrix performance on qemu with RISC-V Matrix Extension(with vector length set to VLEN and matrix length set to RLEN)
 ```
-qemu-riscv64 -cpu rv64,x-v=true,vext_spec=v1.0,vlen=VLEN,x-matrix=on,mlen=MLEN ./matrix.elf
+qemu-riscv64 -cpu rv64,x-v=true,vext_spec=v1.0,vlen=VLEN,x-matrix=on,rlen=RLEN ./matrix.elf
 ```
 
 

diff --git a/demos/run.sh b/demos/run.sh
@@ -13,7 +13,7 @@ run() {
     module=$2
 
     echo ${module}" is running"
-    ${qemu} -CPF -cpu rv64,x-v=true,vext_spec=v1.0,vlen=128,x-matrix=on,mlen=128 ./${dir_name}/${module}.elf | tee -a log & sleep 5
+    ${qemu} -CPF -cpu rv64,x-v=true,vext_spec=v1.0,vlen=128,x-matrix=on,rlen=128 ./${dir_name}/${module}.elf | tee -a log & sleep 5
     ${cpf64} record -e ${dir_name}/${module}.elf
     ${cpf64} stat   -e ${dir_name}/${module}.elf --dump-inst >perf_data/${module}.log
     ${cpf64} report -g >/dev/null
@@ -26,7 +26,7 @@ run gemm gemm_fp16
 run resnet50 resnet50_int8
 run resnet50 resnet50_fp16
 run intrinsic_matmul matmul
-# run gemm gemm_int8_rvv
-# run gemm gemm_fp16_rvv
-# run resnet50 resnet50_int8_rvv
-# run resnet50 resnet50_fp16_rvv
+run gemm gemm_int8_rvv
+run gemm gemm_fp16_rvv
+run resnet50 resnet50_int8_rvv
+run resnet50 resnet50_fp16_rvv
diff --git a/demos/toolchain/xuantie-qemu-x86_64-Ubuntu-18.04.tar.gz b/demos/toolchain/xuantie-qemu-x86_64-Ubuntu-18.04.tar.gz