diff --git a/.gitignore b/.gitignore
index 27dca716..fce02145 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,9 @@
*.asm
*.s
-*.o
-/obj
+*.o
+*.so
+obj
*.obj
*.dll
diff --git a/lto.c b/lto.c
new file mode 100644
index 00000000..4765a53d
--- /dev/null
+++ b/lto.c
@@ -0,0 +1,39 @@
+/******************************************************************************\
+* Project: Primitive LTO Merger Substitute *
+* Authors: Iconoclast *
+* Release: 2018.03.17 *
+* License: CC0 Public Domain Dedication *
+* *
+* To the extent possible under law, the author(s) have dedicated all copyright *
+* and related and neighboring rights to this software to the public domain *
+* worldwide. This software is distributed without any warranty. *
+* *
+* You should have received a copy of the CC0 Public Domain Dedication along *
+* with this software. *
+* If not, see . *
+\******************************************************************************/
+
+/*
+ * A single compile-and-link command will be sufficient with this method.
+ *
+ * A command exemplifying this on UNIX with all optimizations in tact may be:
+ * $ cc --shared -o rsp.so lto.c -O3 -msse2 -DARCH_MIN_SSE2 -s
+ *
+ * To control the link-time stage during build with a separate command:
+ * $ gcc -c -o rsp.o lto.c -O3 -msse2 -DARCH_MIN_SSE2
+ * $ ld --shared -o rsp.so -lc rsp.o --strip-all
+ */
+
+#include "module.c"
+#include "su.c"
+
+#include "vu/vu.c"
+
+#include "vu/multiply.c"
+#include "vu/add.c"
+#include "vu/select.c"
+#include "vu/logical.c"
+#include "vu/divide.c"
+#if 0
+#include "vu/pack.c"
+#endif
diff --git a/make.sh b/make.sh
index 77ff4a78..10040dc1 100755
--- a/make.sh
+++ b/make.sh
@@ -1,8 +1,6 @@
mkdir -p obj
mkdir -p obj/vu
-# The below path configuration will only work if you have this `make.sh` script
-# installed to the parent directory just outside the RSP source when you run it.
src="." # or an absolute path, like "/home/user/rsp"
obj="$src/obj"
@@ -16,18 +14,10 @@ OBJ_LIST="\
$obj/vu/logical.o \
$obj/vu/divide.o"
-FLAGS_ANSI="\
- -O3 \
- -fPIC \
- -DPLUGIN_API_VERSION=0x0101 \
- -march=native \
- -mstackrealign \
- -Wall \
- -pedanticz"
+FLAGS_ANSI="-fPIC -DPLUGIN_API_VERSION=0x0101 -mstackrealign -Wall -pedantic"
if [ `uname -m` == 'x86_64' ]; then
FLAGS_x86="\
- -O3 \
-masm=intel \
-fPIC \
-DPLUGIN_API_VERSION=0x0101 \
@@ -38,11 +28,10 @@ FLAGS_x86="\
-pedantic \
-Wall -Wshadow -Wredundant-decls -Wextra -Wcast-align -Wcast-qual \
-Wdisabled-optimization -Wformat=2 -Winit-self -Wlogical-op
- -Wmissing-include-dirs -Wstrict-overflow=5 -Wundef -Wno-unused \
+ -Wmissing-include-dirs -Wstrict-overflow=1 -Wundef -Wno-unused \
-Wno-variadic-macros -Wno-parentheses -fdiagnostics-show-option"
else
FLAGS_x86="\
- -O3 \
-masm=intel \
-DPLUGIN_API_VERSION=0x0101 \
-DARCH_MIN_SSE2 \
@@ -52,25 +41,25 @@ FLAGS_x86="\
-pedantic \
-Wall -Wshadow -Wredundant-decls -Wextra -Wcast-align -Wcast-qual \
-Wdisabled-optimization -Wformat=2 -Winit-self -Wlogical-op
- -Wmissing-include-dirs -Wstrict-overflow=5 -Wundef -Wno-unused \
+ -Wmissing-include-dirs -Wstrict-overflow=1 -Wundef -Wno-unused \
-Wno-variadic-macros -Wno-parentheses -fdiagnostics-show-option"
fi
C_FLAGS=$FLAGS_x86 # default since Intel SIMD was the most tested
echo Compiling C source code...
-cc -S $C_FLAGS -o $obj/module.s $src/module.c
-cc -S $C_FLAGS -o $obj/su.s $src/su.c
-cc -S $C_FLAGS -o $obj/vu/vu.s $src/vu/vu.c
-cc -S $C_FLAGS -o $obj/vu/multiply.s $src/vu/multiply.c
-cc -S $C_FLAGS -o $obj/vu/add.s $src/vu/add.c
-cc -S $C_FLAGS -o $obj/vu/select.s $src/vu/select.c
-cc -S $C_FLAGS -o $obj/vu/logical.s $src/vu/logical.c
-cc -S $C_FLAGS -o $obj/vu/divide.s $src/vu/divide.c
+cc -S -Os $C_FLAGS -o $obj/module.s $src/module.c
+cc -S -O3 $C_FLAGS -o $obj/su.s $src/su.c
+cc -S -O3 $C_FLAGS -o $obj/vu/vu.s $src/vu/vu.c
+cc -S -O3 $C_FLAGS -o $obj/vu/multiply.s $src/vu/multiply.c
+cc -S -O3 $C_FLAGS -o $obj/vu/add.s $src/vu/add.c
+cc -S -O3 $C_FLAGS -o $obj/vu/select.s $src/vu/select.c
+cc -S -O3 $C_FLAGS -o $obj/vu/logical.s $src/vu/logical.c
+cc -S -O2 $C_FLAGS -o $obj/vu/divide.s $src/vu/divide.c
echo Assembling compiled sources...
-as --statistics -o $obj/module.o $obj/module.s
-as --statistics -o $obj/su.o $obj/su.s
-as --statistics -o $obj/vu/vu.o $obj/vu/vu.s
+as -o $obj/module.o $obj/module.s
+as -o $obj/su.o $obj/su.s
+as -o $obj/vu/vu.o $obj/vu/vu.s
as -o $obj/vu/multiply.o $obj/vu/multiply.s
as -o $obj/vu/add.o $obj/vu/add.s
as -o $obj/vu/select.o $obj/vu/select.s
@@ -78,5 +67,5 @@ as -o $obj/vu/logical.o $obj/vu/logical.s
as -o $obj/vu/divide.o $obj/vu/divide.s
echo Linking assembled object files...
-ld --shared -o $obj/rspdebug.so $OBJ_LIST
-strip -o $obj/rsp.so $obj/rspdebug.so
+ld --shared -o $obj/rspdebug.so -lc $OBJ_LIST
+strip -o $obj/rsp.so $obj/rspdebug.so --strip-all
diff --git a/make_w32.cmd b/make_w32.cmd
index 463f2066..ab9b4c12 100644
--- a/make_w32.cmd
+++ b/make_w32.cmd
@@ -1,7 +1,18 @@
@ECHO OFF
TITLE MinGW Compiler Suite Invocation
+REM If you have MinGW on a different drive letter or installed at a custom path
+REM (or just not yet installed at all), this build script may not work out of
+REM the box for most Windows users. Alternatives include MinGW-w32 or trying
+REM to execute the Unix shell script "make.sh" from Windows 10+ or Git Bash.
+
+REM The following line is the only one you should ever need to change.
set MinGW=C:\MinGW
+
+set lib=%MinGW%\lib
+set bin=%MinGW%\bin
+set inc=%MinGW%\include
+
REM set rsp=%USERPROFILE%\rsp
set rsp=%CD%
set obj=%rsp%\obj
@@ -16,20 +27,16 @@ set OBJ_LIST=^
%obj%\vu\logical.o ^
%obj%\vu\divide.o
-set FLAGS_ANSI=-O3^
+set FLAGS_ANSI=-Wall -pedantic^
-DPLUGIN_API_VERSION=0x0101^
- -march=native^
-mstackrealign^
- -Wall^
- -pedantic
-set FLAGS_x86=-O3^
+ -march=native
+set FLAGS_x86=-Wall -pedantic^
-masm=intel^
-DPLUGIN_API_VERSION=0x0101^
-DARCH_MIN_SSE2^
- -march=native^
-mstackrealign^
- -Wall^
- -pedantic
+ -march=native
set C_FLAGS=%FLAGS_x86%
if not exist obj (
@@ -37,31 +44,33 @@ mkdir obj
cd obj
mkdir vu
)
-cd %MinGW%\bin
+cd /D %bin%
ECHO Compiling C source code...
-cc -S %C_FLAGS% -o %obj%\module.asm %rsp%\module.c
-cc -S %C_FLAGS% -o %obj%\su.asm %rsp%\su.c
-cc -S %C_FLAGS% -o %obj%\vu\vu.asm %rsp%\vu\vu.c
-cc -S %C_FLAGS% -o %obj%\vu\multiply.asm %rsp%\vu\multiply.c
-cc -S %C_FLAGS% -o %obj%\vu\add.asm %rsp%\vu\add.c
-cc -S %C_FLAGS% -o %obj%\vu\select.asm %rsp%\vu\select.c
-cc -S %C_FLAGS% -o %obj%\vu\logical.asm %rsp%\vu\logical.c
-cc -S %C_FLAGS% -o %obj%\vu\divide.asm %rsp%\vu\divide.c
+@ECHO ON
+gcc -Os -S %C_FLAGS% -o %obj%\module.asm %rsp%\module.c
+gcc -O3 -S %C_FLAGS% -o %obj%\su.asm %rsp%\su.c
+gcc -O3 -S %C_FLAGS% -o %obj%\vu\vu.asm %rsp%\vu\vu.c
+gcc -O3 -S %C_FLAGS% -o %obj%\vu\multiply.asm %rsp%\vu\multiply.c
+gcc -O3 -S %C_FLAGS% -o %obj%\vu\add.asm %rsp%\vu\add.c
+gcc -O3 -S %C_FLAGS% -o %obj%\vu\select.asm %rsp%\vu\select.c
+gcc -O3 -S %C_FLAGS% -o %obj%\vu\logical.asm %rsp%\vu\logical.c
+gcc -O2 -S %C_FLAGS% -o %obj%\vu\divide.asm %rsp%\vu\divide.c
+@ECHO OFF
ECHO.
ECHO Assembling compiled sources...
-as --statistics -o %obj%\module.o %obj%\module.asm
-as --statistics -o %obj%\su.o %obj%\su.asm
-as --statistics -o %obj%\vu\vu.o %obj%\vu\vu.asm
-as -o %obj%\vu\multiply.o %obj%\vu\multiply.asm
-as -o %obj%\vu\add.o %obj%\vu\add.asm
-as -o %obj%\vu\select.o %obj%\vu\select.asm
-as -o %obj%\vu\logical.o %obj%\vu\logical.asm
-as -o %obj%\vu\divide.o %obj%\vu\divide.asm
+as -o %obj%\module.o %obj%\module.asm
+as -o %obj%\su.o %obj%\su.asm
+as -o %obj%\vu\vu.o %obj%\vu\vu.asm
+as -o %obj%\vu\multiply.o %obj%\vu\multiply.asm
+as -o %obj%\vu\add.o %obj%\vu\add.asm
+as -o %obj%\vu\select.o %obj%\vu\select.asm
+as -o %obj%\vu\logical.o %obj%\vu\logical.asm
+as -o %obj%\vu\divide.o %obj%\vu\divide.asm
ECHO.
ECHO Linking assembled object files...
-ld --shared -e _DllMain@12 -o %obj%\rspdebug.dll %OBJ_LIST% %MinGW%\lib\libkernel32.a
-strip -o %obj%/rsp.dll %obj%/rspdebug.dll
+ld --shared -e _DllMain@12 -o %obj%\rspdebug.dll -L %lib% %OBJ_LIST% -lmsvcrt
+strip -o %obj%\rsp.dll %obj%\rspdebug.dll --strip-all
PAUSE
diff --git a/make_w64.cmd b/make_w64.cmd
index 11ff4a03..45cf7e7d 100644
--- a/make_w64.cmd
+++ b/make_w64.cmd
@@ -1,8 +1,22 @@
@ECHO OFF
TITLE MinGW Compiler Suite Invocation
-set version=x86_64-5.1.0-win32-seh-rt_v4-rev0
-set MinGW="C:\Program Files\mingw-w64\%version%\mingw64"
+REM If you have installed MinGW-w64 without using MSYS2 to obtain the package
+REM (or just not yet installed at all), this build script may not work out of
+REM the box for most Windows users. If you have Cygwin instead or whatever
+REM else, be sure to adjust the path below, or execute "make.sh" in a Git shell.
+
+REM The following line is the only one you should ever need to change.
+set mingw64=C:\msys64\mingw64
+
+REM The following two variables are irrelevant, unless you set a 32-bit target.
+set mingw32=%mingw64%\..\mingw32
+set lib=%mingw32%\i686-w64-mingw32\lib
+
+set lib64=%mingw64%\x86_64-w64-mingw32\lib
+set bin=%mingw64%\bin
+set inc=%lib64%\..\include
+
REM set rsp=%USERPROFILE%\rsp
set rsp=%CD%
set obj=%rsp%\obj
@@ -15,21 +29,18 @@ set OBJ_LIST=^
%obj%\vu\add.o ^
%obj%\vu\select.o ^
%obj%\vu\logical.o ^
-%obj%\vu\divide.o ^
-%MinGW%\x86_64-w64-mingw32\lib\libkernel32.a
+%obj%\vu\divide.o
-set FLAGS_ANSI=-Wall^
+set FLAGS_ANSI=-Wall -pedantic^
-DPLUGIN_API_VERSION=0x0101^
- -march=native^
-mstackrealign^
- -pedantic
-set FLAGS_x86=-Wall^
- -masm=intel^
+ -march=native
+set FLAGS_x86=-Wall -pedantic^
-DPLUGIN_API_VERSION=0x0101^
-DARCH_MIN_SSE2^
- -march=native^
+ -masm=intel^
-mstackrealign^
- -pedantic
+ -march=native
set C_FLAGS=%FLAGS_x86%
if not exist obj (
@@ -37,31 +48,33 @@ mkdir obj
cd obj
mkdir vu
)
-cd %MinGW%\bin
+cd /D %bin%
ECHO Compiling C source code...
-%MinGW%\bin\gcc.exe -S -Os %C_FLAGS% -o %obj%\module.asm %rsp%\module.c
-%MinGW%\bin\gcc.exe -S -O3 %C_FLAGS% -o %obj%\su.asm %rsp%\su.c
-%MinGW%\bin\gcc.exe -S -O3 %C_FLAGS% -o %obj%\vu\vu.asm %rsp%\vu\vu.c
-%MinGW%\bin\gcc.exe -S -O3 %C_FLAGS% -o %obj%\vu\multiply.asm %rsp%\vu\multiply.c
-%MinGW%\bin\gcc.exe -S -O3 %C_FLAGS% -o %obj%\vu\add.asm %rsp%\vu\add.c
-%MinGW%\bin\gcc.exe -S -O3 %C_FLAGS% -o %obj%\vu\select.asm %rsp%\vu\select.c
-%MinGW%\bin\gcc.exe -S -O3 %C_FLAGS% -o %obj%\vu\logical.asm %rsp%\vu\logical.c
-%MinGW%\bin\gcc.exe -S -O3 %C_FLAGS% -o %obj%\vu\divide.asm %rsp%\vu\divide.c
+@ECHO ON
+gcc -S -Os %C_FLAGS% -o %obj%\module.asm %rsp%\module.c
+gcc -S -O3 %C_FLAGS% -o %obj%\su.asm %rsp%\su.c
+gcc -S -O3 %C_FLAGS% -o %obj%\vu\vu.asm %rsp%\vu\vu.c
+gcc -S -O3 %C_FLAGS% -o %obj%\vu\multiply.asm %rsp%\vu\multiply.c
+gcc -S -O3 %C_FLAGS% -o %obj%\vu\add.asm %rsp%\vu\add.c
+gcc -S -O3 %C_FLAGS% -o %obj%\vu\select.asm %rsp%\vu\select.c
+gcc -S -O3 %C_FLAGS% -o %obj%\vu\logical.asm %rsp%\vu\logical.c
+gcc -S -O2 %C_FLAGS% -o %obj%\vu\divide.asm %rsp%\vu\divide.c
+@ECHO OFF
ECHO.
ECHO Assembling compiled sources...
-%MinGW%\bin\as.exe -o %obj%\module.o %obj%\module.asm
-%MinGW%\bin\as.exe -o %obj%\su.o %obj%\su.asm
-%MinGW%\bin\as.exe -o %obj%\vu\vu.o %obj%\vu\vu.asm
-%MinGW%\bin\as.exe -o %obj%\vu\multiply.o %obj%\vu\multiply.asm
-%MinGW%\bin\as.exe -o %obj%\vu\add.o %obj%\vu\add.asm
-%MinGW%\bin\as.exe -o %obj%\vu\select.o %obj%\vu\select.asm
-%MinGW%\bin\as.exe -o %obj%\vu\logical.o %obj%\vu\logical.asm
-%MinGW%\bin\as.exe -o %obj%\vu\divide.o %obj%\vu\divide.asm
+as -o %obj%\module.o %obj%\module.asm
+as -o %obj%\su.o %obj%\su.asm
+as -o %obj%\vu\vu.o %obj%\vu\vu.asm
+as -o %obj%\vu\multiply.o %obj%\vu\multiply.asm
+as -o %obj%\vu\add.o %obj%\vu\add.asm
+as -o %obj%\vu\select.o %obj%\vu\select.asm
+as -o %obj%\vu\logical.o %obj%\vu\logical.asm
+as -o %obj%\vu\divide.o %obj%\vu\divide.asm
ECHO.
ECHO Linking assembled object files...
-%MinGW%\bin\ld.exe --shared -e DllMain -o %obj%\rspdebug.dll %OBJ_LIST%
-%MinGW%\bin\strip.exe -o %obj%/rsp.dll %obj%/rspdebug.dll
+ld --shared -e DllMain -o %obj%\rspdebug.dll -L%lib64% %OBJ_LIST% -lmsvcrt
+strip -o %obj%\rsp.dll %obj%\rspdebug.dll --strip-all
PAUSE
diff --git a/module.c b/module.c
index 77d6eb33..5ac1ea5c 100644
--- a/module.c
+++ b/module.c
@@ -1,7 +1,7 @@
/******************************************************************************\
* Project: Module Subsystem Interface to SP Interpreter Core *
* Authors: Iconoclast *
-* Release: 2016.11.05 *
+* Release: 2018.03.21 *
* License: CC0 Public Domain Dedication *
* *
* To the extent possible under law, the author(s) have dedicated all copyright *
@@ -28,6 +28,20 @@
#include "module.h"
#include "su.h"
+#include
+#include
+
+static jmp_buf CPU_state;
+static void seg_av_handler(int signal_code)
+{
+ longjmp(CPU_state, signal_code);
+}
+static void ISA_op_illegal(int signal_code)
+{
+ message("Plugin built for SIMD extensions this CPU does not support!");
+ raise(signal_code); /* e.g., rsp.dll built with -mssse3; the CPU is SSE2. */
+}
+
RSP_INFO RSP_INFO_NAME;
#define RSP_CXD4_VERSION 0x0101
@@ -60,7 +74,7 @@ ptr_CoreDoCommand CoreDoCommand = NULL;
NOINLINE void update_conf(const char* source)
{
- memset(conf, 0, sizeof(conf));
+ memset(conf, 0, 32);
m64p_rom_header ROM_HEADER;
CoreDoCommand(M64CMD_ROM_GET_HEADER, sizeof(ROM_HEADER), &ROM_HEADER);
@@ -252,7 +266,7 @@ EXPORT void CALL DllAbout(p_void hParent)
EXPORT void CALL DllConfig(p_void hParent)
{
- my_system("sp_cfgui");
+ system("sp_cfgui");
update_conf(CFG_FILE);
if (DMEM == IMEM || GET_RCP_REG(SP_PC_REG) % 4096 == 0x00000000)
@@ -269,27 +283,28 @@ EXPORT void CALL DllConfig(p_void hParent)
EXPORT unsigned int CALL DoRspCycles(unsigned int cycles)
{
+ static char task_debug[] = "unknown task type: 0x????????";
+ char* task_debug_type;
OSTask_type task_type;
register unsigned int i;
- if (GET_RCP_REG(SP_STATUS_REG) & 0x00000003)
- {
+ if (GET_RCP_REG(SP_STATUS_REG) & 0x00000003) {
message("SP_STATUS_HALT");
return 0x00000000;
}
+ task_debug_type = &task_debug[strlen("unknown task type: 0x")];
- task_type = 0x00000000
#ifdef USE_CLIENT_ENDIAN
- | *((pi32)(DMEM + 0x000FC0U))
+ memcpy(&task_type, DMEM + 0xFC0, 4);
#else
- | (u32)DMEM[0xFC0] << 24
- | (u32)DMEM[0xFC1] << 16
- | (u32)DMEM[0xFC2] << 8
- | (u32)DMEM[0xFC3] << 0
-#endif
+ task_type = 0x00000000
+ | (u32)(DMEM[0xFC0 ^ 0] & 0xFFu) << 24
+ | (u32)(DMEM[0xFC1 ^ 0] & 0xFFu) << 16
+ | (u32)(DMEM[0xFC2 ^ 0] & 0xFFu) << 8
+ | (u32)(DMEM[0xFC3 ^ 0] & 0xFFu) << 0
;
+#endif
switch (task_type) {
-#ifdef EXTERN_COMMAND_LIST_GBI
case M_GFXTASK:
if (CFG_HLE_GFX == 0)
break;
@@ -299,10 +314,17 @@ EXPORT unsigned int CALL DoRspCycles(unsigned int cycles)
GET_RCP_REG(SP_STATUS_REG) |=
SP_STATUS_SIG2 | SP_STATUS_BROKE | SP_STATUS_HALT
;
+#if defined(M64P_PLUGIN_API)
if (GET_RSP_INFO(ProcessDlistList) == NULL)
{ /* branch */ }
else
GET_RSP_INFO(ProcessDlistList)();
+#else
+ if (GET_RSP_INFO(ProcessDList) == NULL)
+ { /* branch */ }
+ else
+ GET_RSP_INFO(ProcessDList)();
+#endif
if ((GET_RCP_REG(SP_STATUS_REG) & SP_STATUS_INTR_BREAK) && (GET_RCP_REG(SP_STATUS_REG) & (SP_STATUS_SIG2 | SP_STATUS_BROKE | SP_STATUS_HALT))) {
GET_RCP_REG(MI_INTR_REG) |= 0x00000001;
@@ -310,16 +332,21 @@ EXPORT unsigned int CALL DoRspCycles(unsigned int cycles)
}
GET_RCP_REG(DPC_STATUS_REG) &= ~0x00000002ul; /* DPC_STATUS_FREEZE */
return 0;
-#endif
-#ifdef EXTERN_COMMAND_LIST_ABI
case M_AUDTASK:
if (CFG_HLE_AUD == 0)
break;
+#if defined(M64P_PLUGIN_API)
if (GET_RSP_INFO(ProcessAlistList) == NULL)
{ /* branch */ }
else
GET_RSP_INFO(ProcessAlistList)();
+#else
+ if (GET_RSP_INFO(ProcessAList) == NULL)
+ { /* branch */ }
+ else
+ GET_RSP_INFO(ProcessAList)();
+#endif
GET_RCP_REG(SP_STATUS_REG) |=
SP_STATUS_SIG2 | SP_STATUS_BROKE | SP_STATUS_HALT
@@ -329,7 +356,6 @@ EXPORT unsigned int CALL DoRspCycles(unsigned int cycles)
GET_RSP_INFO(CheckInterrupts)();
}
return 0;
-#endif
case M_VIDTASK:
message("M_VIDTASK");
break;
@@ -346,10 +372,15 @@ EXPORT unsigned int CALL DoRspCycles(unsigned int cycles)
break;
GET_RSP_INFO(ShowCFB)(); /* forced FB refresh in case gfx plugin skip */
break;
+ default:
+ if (task_type == 0x8BC43B5D)
+ break; /* CIC boot code sent to the RSP */
+ sprintf(task_debug_type, "%08lX", (unsigned long)task_type);
+ message(task_debug);
}
#ifdef WAIT_FOR_CPU_HOST
- for (i = 0; i < 32; i++)
+ for (i = 0; i < NUMBER_OF_SCALAR_REGISTERS; i++)
MFC0_count[i] = 0;
#endif
run_task();
@@ -387,7 +418,7 @@ EXPORT void CALL GetDllInfo(PLUGIN_INFO *PluginInfo)
{
PluginInfo -> Version = PLUGIN_API_VERSION;
PluginInfo -> Type = PLUGIN_TYPE_RSP;
- my_strcpy(PluginInfo -> Name, "Static Interpreter");
+ strcpy(PluginInfo -> Name, "Static Interpreter");
PluginInfo -> NormalMemory = 0;
PluginInfo -> MemoryBswaped = USE_CLIENT_ENDIAN;
return;
@@ -406,6 +437,8 @@ void no_LLE(void)
}
EXPORT void CALL InitiateRSP(RSP_INFO Rsp_Info, pu32 CycleCount)
{
+ int recovered_from_exception;
+
if (CycleCount != NULL) /* cycle-accuracy not doable with today's hosts */
*CycleCount = 0;
update_conf(CFG_FILE);
@@ -425,7 +458,7 @@ EXPORT void CALL InitiateRSP(RSP_INFO Rsp_Info, pu32 CycleCount)
CR[0x5] = &GET_RCP_REG(SP_DMA_FULL_REG);
CR[0x6] = &GET_RCP_REG(SP_DMA_BUSY_REG);
CR[0x7] = &GET_RCP_REG(SP_SEMAPHORE_REG);
- GET_RCP_REG(SP_PC_REG) = 0x04001000;
+ *(RSP_INFO_NAME.SP_PC_REG) = 0x04001000;
CR[0x8] = &GET_RCP_REG(DPC_START_REG);
CR[0x9] = &GET_RCP_REG(DPC_END_REG);
CR[0xA] = &GET_RCP_REG(DPC_CURRENT_REG);
@@ -443,6 +476,28 @@ EXPORT void CALL InitiateRSP(RSP_INFO Rsp_Info, pu32 CycleCount)
GBI_phase = GET_RSP_INFO(ProcessRdpList);
if (GBI_phase == NULL)
GBI_phase = no_LLE;
+
+ signal(SIGILL, ISA_op_illegal);
+#ifndef _WIN32
+ signal(SIGSEGV, seg_av_handler);
+ for (SR[ra] = 0; SR[ra] < 0x80000000ul; SR[ra] += 0x200000) {
+ recovered_from_exception = setjmp(CPU_state);
+ if (recovered_from_exception)
+ break;
+ SR[at] += DRAM[SR[ra]];
+ }
+ for (SR[at] = 0; SR[at] < 31; SR[at]++) {
+ SR[ra] = (SR[ra] & ~1) >> 1;
+ if (SR[ra] == 0)
+ break;
+ }
+ su_max_address = (1 << SR[at]) - 1;
+#endif
+
+ if (su_max_address < 0x1FFFFFul)
+ su_max_address = 0x1FFFFFul; /* 2 MiB */
+ if (su_max_address > 0xFFFFFFul)
+ su_max_address = 0xFFFFFFul; /* 16 MiB */
return;
}
@@ -455,9 +510,9 @@ EXPORT void CALL RomClosed(void)
* If the config file wasn't installed correctly, politely shut errors up.
*/
#if !defined(M64P_PLUGIN_API)
- FILE* stream = my_fopen(CFG_FILE, "wb");
- my_fwrite(conf, 8, 32 / 8, stream);
- my_fclose(stream);
+ FILE* stream = fopen(CFG_FILE, "wb");
+ fwrite(conf, 8, 32 / 8, stream);
+ fclose(stream);
#endif
return;
}
@@ -470,22 +525,22 @@ NOINLINE void message(const char* body)
char* argv;
int i, j;
- argv = my_calloc(my_strlen(body) + 64, 1);
- my_strcpy(argv, "CMD /Q /D /C \"TITLE RSP Message&&ECHO ");
+ argv = calloc(strlen(body) + 64, 1);
+ strcpy(argv, "CMD /Q /D /C \"TITLE RSP Message&&ECHO ");
i = 0;
- j = my_strlen(argv);
+ j = strlen(argv);
while (body[i] != '\0') {
if (body[i] == '\n') {
- my_strcat(argv, "&&ECHO ");
+ strcat(argv, "&&ECHO ");
++i;
j += 7;
continue;
}
argv[j++] = body[i++];
}
- my_strcat(argv, "&&PAUSE&&EXIT\"");
- my_system(argv);
- my_free(argv);
+ strcat(argv, "&&PAUSE&&EXIT\"");
+ system(argv);
+ free(argv);
#else
fputs(body, stdout);
putchar('\n');
@@ -519,13 +574,13 @@ NOINLINE void update_conf(const char* source)
for (i = 0; i < 32; i++)
conf[i] = 0x00;
- stream = my_fopen(source, "rb");
+ stream = fopen(source, "rb");
if (stream == NULL) {
message("Failed to read config.");
return;
}
- my_fread(conf, 8, 32 / 8, stream);
- my_fclose(stream);
+ fread(conf, 8, 32 / 8, stream);
+ fclose(stream);
return;
}
#endif
@@ -548,11 +603,11 @@ void step_SP_commands(uint32_t inst)
sprintf(&offset[0], "%03X", GET_RCP_REG(SP_PC_REG) & 0xFFF);
sprintf(&code[0], "%08X", inst);
strcpy(text, offset);
- my_strcat(text, "\n");
- my_strcat(text, code);
+ strcat(text, "\n");
+ strcat(text, code);
message(text); /* PC offset, MIPS hex. */
if (output_log != NULL)
- my_fwrite(endian_swap, 4, 1, output_log);
+ fwrite(endian_swap, 4, 1, output_log);
}
#endif
@@ -563,13 +618,13 @@ NOINLINE void export_data_cache(void)
register int i;
/* const int little_endian = GET_RSP_INFO(MemoryBswaped); */
- DMEM_swapped = my_calloc(4096, 1);
+ DMEM_swapped = calloc(4096, 1);
for (i = 0; i < 4096; i++)
DMEM_swapped[i] = DMEM[BES(i)];
- out = my_fopen("rcpcache.dhex", "wb");
- my_fwrite(DMEM_swapped, 16, 4096 / 16, out);
- my_fclose(out);
- my_free(DMEM_swapped);
+ out = fopen("rcpcache.dhex", "wb");
+ fwrite(DMEM_swapped, 16, 4096 / 16, out);
+ fclose(out);
+ free(DMEM_swapped);
return;
}
NOINLINE void export_instruction_cache(void)
@@ -579,13 +634,13 @@ NOINLINE void export_instruction_cache(void)
register int i;
/* const int little_endian = GET_RSP_INFO(MemoryBswaped); */
- IMEM_swapped = my_calloc(4096, 1);
+ IMEM_swapped = calloc(4096, 1);
for (i = 0; i < 4096; i++)
IMEM_swapped[i] = IMEM[BES(i)];
- out = my_fopen("rcpcache.ihex", "wb");
- my_fwrite(IMEM_swapped, 16, 4096 / 16, out);
- my_fclose(out);
- my_free(IMEM_swapped);
+ out = fopen("rcpcache.ihex", "wb");
+ fwrite(IMEM_swapped, 16, 4096 / 16, out);
+ fclose(out);
+ free(IMEM_swapped);
return;
}
void export_SP_memory(void)
@@ -597,189 +652,26 @@ void export_SP_memory(void)
/*
* Microsoft linker defaults to an entry point of `_DllMainCRTStartup',
- * which attaches several CRT dependencies. To eliminate CRT dependencies,
- * we direct the linker to cursor the entry point to the lower-level
- * `DllMain' symbol or, alternatively, link with /NOENTRY for no entry point.
+ * which attaches several CRT dependencies. To eliminate linkage of unused
+ * startup CRT code, we direct the linker to use DllMain as the entry point.
+ *
+ * The same approach is taken with MinGW to get those weird MinGW-specific
+ * messages and unused initializer functions out of the plugin binary.
*/
-#ifdef WIN32
-BOOL WINAPI DllMain(
- HINSTANCE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
+#ifdef _WIN32
+BOOL WINAPI
+DllMain(HINSTANCE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
{
hModule = lpReserved = NULL; /* unused */
- switch (ul_reason_for_call)
- {
-case 1: /* DLL_PROCESS_ATTACH */
- break;
-case 2: /* DLL_THREAD_ATTACH */
- break;
-case 3: /* DLL_THREAD_DETACH */
- break;
-case 0: /* DLL_PROCESS_DETACH */
+ switch (ul_reason_for_call) {
+ case 1: /* DLL_PROCESS_ATTACH */
+ case 2: /* DLL_THREAD_ATTACH */
+ case 3: /* DLL_THREAD_DETACH */
+ case 0: /* DLL_PROCESS_DETACH */
break;
+ default:
+ message("Unknown reason for call.");
}
- return 1; /* TRUE */
-}
-#endif
-
-/*
- * low-level recreations of the C standard library functions for operating
- * systems that define a C run-time or dependency on top of fixed OS calls
- *
- * Currently, this only addresses Microsoft Windows.
- *
- * None of these are meant to out-perform the original functions, by the way
- * (especially with better intrinsic compiler support for stuff like memcpy),
- * just to cut down on I-cache use for performance-irrelevant code sections
- * and to avoid std. lib run-time dependencies on certain operating systems.
- */
-
-NOINLINE p_void my_calloc(size_t count, size_t size)
-{
-#ifdef WIN32
- return GlobalAlloc(GPTR, size * count);
-#else
- return calloc(count, size);
-#endif
-}
-
-NOINLINE void my_free(p_void ptr)
-{
-#ifdef WIN32
- while (GlobalFree(ptr) != NULL)
- message("GlobalFree() failure");
-#else
- free(ptr);
-#endif
- return;
-}
-
-NOINLINE size_t my_strlen(const char* str)
-{
- size_t ret_slot;
-
- for (ret_slot = 0; *str != '\0'; ret_slot++, str++)
- ;
- return (ret_slot);
-}
-
-NOINLINE char* my_strcpy(char* destination, const char* source)
-{
- register size_t i;
- const size_t length = my_strlen(source) + 1; /* including null terminator */
-
- for (i = 0; i < length; i++)
- destination[i] = source[i];
- return (destination);
-}
-
-NOINLINE char* my_strcat(char* destination, const char* source)
-{
- const size_t length = my_strlen(destination);
-
- my_strcpy(destination + length, source);
- return (destination);
-}
-
-NOINLINE int my_system(char* command)
-{
- int ret_slot;
-#ifdef WIN32
- static STARTUPINFOA info;
- static PROCESS_INFORMATION info_process;
-
- info.cb = sizeof(info);
- info.dwFillAttribute =
- FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY;
- info.dwFlags = STARTF_USEFILLATTRIBUTE | STARTF_USECOUNTCHARS;
-
- info.dwXCountChars = 80;
- info.dwYCountChars = 20;
-
- ret_slot = CreateProcessA(
- NULL,
- command,
- NULL,
- NULL,
- FALSE,
- 0x00000000,
- NULL,
- NULL,
- &info,
- &info_process
- );
-
- WaitForSingleObject(info_process.hProcess, INFINITE);
- CloseHandle(info_process.hProcess);
- CloseHandle(info_process.hThread);
-#elif TARGET_OS_IPHONE || TARGET_OS_TV
- // system not available in iOS
- ret_slot = 0;
-#else
- ret_slot = system(command);
-#endif
- return (ret_slot);
-}
-
-NOINLINE FILE* my_fopen(const char * filename, const char* mode)
-{
-#ifdef WIN32
-#if 0
- if (mode[1] != 'b')
- return NULL; /* non-binary yet to be supported? */
-#endif
- return (FILE *)(HANDLE)CreateFileA(
- filename,
- (mode[0] == 'r') ? GENERIC_READ : GENERIC_WRITE,
- (mode[0] == 'r') ? FILE_SHARE_READ : FILE_SHARE_WRITE,
- NULL,
- (mode[0] == 'r') ? OPEN_EXISTING : CREATE_ALWAYS,
-#if 0
- FILE_FLAG_WRITE_THROUGH | FILE_FLAG_OVERLAPPED | FILE_FLAG_NO_BUFFERING,
-#else
- (mode[0] == 'r') ? FILE_ATTRIBUTE_NORMAL : FILE_FLAG_WRITE_THROUGH,
-#endif
- NULL
- );
-#else
- return fopen(filename, mode);
-#endif
-}
-
-NOINLINE int my_fclose(FILE* stream)
-{
- int ret_slot;
-#ifdef WIN32
- ret_slot = !CloseHandle((HANDLE)stream);
-#else
- ret_slot = fclose(stream);
-#endif
- return (ret_slot);
-}
-
-NOINLINE size_t my_fread(p_void ptr, size_t size, size_t count, FILE* stream)
-{
-#ifdef WIN32
- DWORD ret_slot;
-
- ReadFile((HANDLE)stream, ptr, size * count, &ret_slot, NULL);
-#else
- size_t ret_slot;
-
- ret_slot = fread(ptr, size, count, stream);
-#endif
- return (size_t)(ret_slot);
-}
-
-NOINLINE size_t my_fwrite(p_void ptr, size_t size, size_t count, FILE* stream)
-{
-#ifdef WIN32
- DWORD ret_slot;
-
- WriteFile((HANDLE)stream, ptr, size * count, &ret_slot, NULL);
-#else
- size_t ret_slot;
-
- ret_slot = fwrite(ptr, size, count, stream);
-#endif
- return (size_t)(ret_slot);
+ return TRUE;
}
+#endif
\ No newline at end of file
diff --git a/module.h b/module.h
index 57711995..f889006b 100644
--- a/module.h
+++ b/module.h
@@ -1,7 +1,7 @@
/******************************************************************************\
* Project: Module Subsystem Interface to SP Interpreter Core *
* Authors: Iconoclast *
-* Release: 2016.11.05 *
+* Release: 2018.03.17 *
* License: CC0 Public Domain Dedication *
* *
* To the extent possible under law, the author(s) have dedicated all copyright *
@@ -26,7 +26,9 @@ typedef enum {
M_NJPEGTASK = 4,
M_NULTASK = 5,
M_HVQTASK = 6,
- M_HVQMTASK = 7
+ M_HVQMTASK = 7,
+
+ NUM_KNOWN_TASK_TYPES
} OSTask_type;
#define CFG_FILE "rsp_conf.bin"
@@ -87,21 +89,4 @@ extern void step_SP_commands(u32 inst);
#endif
extern void export_SP_memory(void);
-/*
- * low-level recreations of the C standard library functions for operating
- * systems that provide an inconvenient C run-time ecosystem, like Windows
- */
-NOINLINE extern p_void my_calloc(size_t count, size_t size);
-NOINLINE extern void my_free(p_void ptr);
-NOINLINE extern size_t my_strlen(const char* str);
-NOINLINE extern char* my_strcpy(char* destination, const char* source);
-NOINLINE extern char* my_strcat(char* destination, const char* source);
-NOINLINE extern int my_system(char* command);
-NOINLINE extern FILE* my_fopen(const char * filename, const char* mode);
-NOINLINE extern int my_fclose(FILE* stream);
-NOINLINE extern size_t my_fread(
- p_void ptr, size_t size, size_t count, FILE* stream);
-NOINLINE extern size_t my_fwrite(
- p_void ptr, size_t size, size_t count, FILE* stream);
-
#endif
diff --git a/su.c b/su.c
index 97ef2618..7402c7bd 100644
--- a/su.c
+++ b/su.c
@@ -1,7 +1,7 @@
/******************************************************************************\
* Project: MSP Simulation Layer for Scalar Unit Operations *
* Authors: Iconoclast *
-* Release: 2016.11.05 *
+* Release: 2018.03.17 *
* License: CC0 Public Domain Dedication *
* *
* To the extent possible under law, the author(s) have dedicated all copyright *
@@ -21,14 +21,18 @@
*/
#include "module.h"
+/* memcpy() and memset() in SP DMA */
+#include
+
u32 inst_word;
-u32 SR[32];
+u32 SR[NUMBER_OF_SCALAR_REGISTERS];
typedef VECTOR_OPERATION(*p_vector_func)(v16, v16);
pu8 DRAM;
pu8 DMEM;
pu8 IMEM;
+unsigned long su_max_address = 0x007FFFFFul;
NOINLINE void res_S(void)
{
@@ -98,36 +102,41 @@ static void MT_SP_STATUS(unsigned int rt)
pu32 SP_STATUS_REG;
if (SR[rt] & 0xFE000040)
- message("MTC0\nSP_STATUS");
- MI_INTR_REG = GET_RSP_INFO(MI_INTR_REG);
+ message("MTC0\nSP_STATUS"); /* bits we don't know what to do with */
SP_STATUS_REG = GET_RSP_INFO(SP_STATUS_REG);
*SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000001) << 0);
- *SP_STATUS_REG |= (!!(SR[rt] & 0x00000002) << 0);
*SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000004) << 1);
- *MI_INTR_REG &= ~((SR[rt] & 0x00000008) >> 3); /* SP_CLR_INTR */
- *MI_INTR_REG |= ((SR[rt] & 0x00000010) >> 4); /* SP_SET_INTR */
- *SP_STATUS_REG |= (SR[rt] & 0x00000010) >> 4; /* int set halt */
+ /* DMA_BUSY, DMA_FULL, IO_FULL: No feature exists to clear these. */
*SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000020) << 5);
- /* *SP_STATUS_REG |= (!!(SR[rt] & 0x00000040) << 5); */
*SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000080) << 6);
- *SP_STATUS_REG |= (!!(SR[rt] & 0x00000100) << 6);
*SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000200) << 7);
- *SP_STATUS_REG |= (!!(SR[rt] & 0x00000400) << 7); /* yield request? */
*SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000800) << 8);
- *SP_STATUS_REG |= (!!(SR[rt] & 0x00001000) << 8); /* yielded? */
*SP_STATUS_REG &= ~(!!(SR[rt] & 0x00002000) << 9);
- *SP_STATUS_REG |= (!!(SR[rt] & 0x00004000) << 9); /* task done? */
*SP_STATUS_REG &= ~(!!(SR[rt] & 0x00008000) << 10);
- *SP_STATUS_REG |= (!!(SR[rt] & 0x00010000) << 10);
*SP_STATUS_REG &= ~(!!(SR[rt] & 0x00020000) << 11);
- *SP_STATUS_REG |= (!!(SR[rt] & 0x00040000) << 11);
*SP_STATUS_REG &= ~(!!(SR[rt] & 0x00080000) << 12);
- *SP_STATUS_REG |= (!!(SR[rt] & 0x00100000) << 12);
*SP_STATUS_REG &= ~(!!(SR[rt] & 0x00200000) << 13);
- *SP_STATUS_REG |= (!!(SR[rt] & 0x00400000) << 13);
*SP_STATUS_REG &= ~(!!(SR[rt] & 0x00800000) << 14);
+
+ *SP_STATUS_REG |= (!!(SR[rt] & 0x00000002) << 0);
+ /* No feature exists to set BROKE: (!!1 << 1) */
+ /* DMA_BUSY, DMA_FULL, IO_FULL: No feature exists to set these. */
+ *SP_STATUS_REG |= (!!(SR[rt] & 0x00000040) << 5);
+ *SP_STATUS_REG |= (!!(SR[rt] & 0x00000100) << 6);
+ *SP_STATUS_REG |= (!!(SR[rt] & 0x00000400) << 7); /* yield request? */
+ *SP_STATUS_REG |= (!!(SR[rt] & 0x00001000) << 8); /* yielded? */
+ *SP_STATUS_REG |= (!!(SR[rt] & 0x00004000) << 9); /* task done? */
+ *SP_STATUS_REG |= (!!(SR[rt] & 0x00010000) << 10);
+ *SP_STATUS_REG |= (!!(SR[rt] & 0x00040000) << 11);
+ *SP_STATUS_REG |= (!!(SR[rt] & 0x00100000) << 12);
+ *SP_STATUS_REG |= (!!(SR[rt] & 0x00400000) << 13);
*SP_STATUS_REG |= (!!(SR[rt] & 0x01000000) << 14);
+
+ MI_INTR_REG = GET_RSP_INFO(MI_INTR_REG);
+ *MI_INTR_REG &= ~((SR[rt] & 0x00000008) >> 3); /* SP_CLR_INTR */
+ *MI_INTR_REG |= ((SR[rt] & 0x00000010) >> 4); /* SP_SET_INTR */
+ *SP_STATUS_REG |= (SR[rt] & 0x00000010) >> 4; /* int set halt */
return;
}
static void MT_SP_RESERVED(unsigned int rt)
@@ -225,11 +234,12 @@ void SP_DMA_READ(void)
do {
offC = (count*length + *CR[0x0] + i) & 0x00001FF8ul;
offD = (count*skip + *CR[0x1] + i) & 0x00FFFFF8ul;
- *(pi64)(DMEM + offC) =
- *(pi64)(DRAM + offD)
- & (offD & ~MAX_DRAM_DMA_ADDR ? 0 : ~0) /* 0 if (addr > limit) */
- ;
i += 0x008;
+ if (offD > su_max_address) {
+ memset(DMEM + offC, 0x00, 8);
+ continue;
+ }
+ memcpy(DMEM + offC, DRAM + offD, 8);
} while (i < length);
} while (count);
@@ -264,8 +274,10 @@ void SP_DMA_WRITE(void)
do {
offC = (count*length + *CR[0x0] + i) & 0x00001FF8ul;
offD = (count*skip + *CR[0x1] + i) & 0x00FFFFF8ul;
- *(pi64)(DRAM + offD) = *(pi64)(DMEM + offC);
i += 0x000008;
+ if (offD > su_max_address)
+ continue;
+ memcpy(DRAM + offD, DMEM + offC, 8);
} while (i < length);
} while (count);
@@ -825,30 +837,12 @@ void SDV(unsigned vt, unsigned element, signed offset, unsigned base)
return;
}
-static char transfer_debug[32] = "?WC2 $v00[0x0], 0x000($00)";
-static const char digits[16] = {
- '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'
-};
-
-NOINLINE void res_lsw(
- unsigned vt,
- unsigned element,
- signed offset,
- unsigned base)
+NOINLINE void
+res_lsw(unsigned vt, unsigned element, signed offset, unsigned base)
{
- transfer_debug[10] = '0' + (unsigned char)vt/10;
- transfer_debug[11] = '0' + (unsigned char)vt%10;
-
- transfer_debug[15] = digits[element & 0xF];
-
- transfer_debug[21] = digits[(offset & 0xFFF) >> 8];
- transfer_debug[22] = digits[(offset & 0x0FF) >> 4];
- transfer_debug[23] = digits[(offset & 0x00F) >> 0];
-
- transfer_debug[26] = '0' + (unsigned char)base/10;
- transfer_debug[27] = '0' + (unsigned char)base%10;
-
- message(transfer_debug);
+ message("Reserved vector unit transfer operation.");
+ if (vt != element + base || offset != 0) /* unused parameters */
+ return;
return;
}
@@ -1652,7 +1646,7 @@ void STV(unsigned vt, unsigned element, signed offset, unsigned base)
int temp_PC;
#ifdef WAIT_FOR_CPU_HOST
-short MFC0_count[32];
+short MFC0_count[NUMBER_OF_SCALAR_REGISTERS];
#endif
mwc2_func LWC2[2 * 8*2] = {
@@ -1828,7 +1822,7 @@ PROFILE_MODE void MWC2_load(u32 inst)
offset <<= 5 + 4; /* safe on x86, skips 5-bit rd, 4-bit element */
offset >>= 5 + 4;
#else
- offset = (inst & 64) ? -(s16)(~inst%64 + 1) : inst % 64;
+ offset = (inst & 64) ? -(s16)(~inst%64 + 1) : (s16)(inst % 64);
#endif
LWC2[IW_RD(inst)](vt, element, offset, base);
}
@@ -1844,7 +1838,7 @@ PROFILE_MODE void MWC2_store(u32 inst)
offset <<= 5 + 4; /* safe on x86, skips 5-bit rd, 4-bit element */
offset >>= 5 + 4;
#else
- offset = (inst & 64) ? -(s16)(~inst%64 + 1) : inst % 64;
+ offset = (inst & 64) ? -(s16)(~inst%64 + 1) : (s16)(inst % 64);
#endif
SWC2[IW_RD(inst)](vt, element, offset, base);
}
diff --git a/su.h b/su.h
index 0e46af16..f0f7d430 100644
--- a/su.h
+++ b/su.h
@@ -1,7 +1,7 @@
/******************************************************************************\
* Project: Basic MIPS R4000 Instruction Set for Scalar Unit Operations *
* Authors: Iconoclast *
-* Release: 2016.11.05 *
+* Release: 2018.03.17 *
* License: CC0 Public Domain Dedication *
* *
* To the extent possible under law, the author(s) have dedicated all copyright *
@@ -22,8 +22,6 @@
#include "my_types.h"
#include "rsp.h"
-#define EXTERN_COMMAND_LIST_GBI
-#define EXTERN_COMMAND_LIST_ABI
#define SEMAPHORE_LOCK_CORRECTIONS
#define WAIT_FOR_CPU_HOST
@@ -34,10 +32,10 @@
/*
* Currently, the plugin system this module is written for doesn't notify us
- * of how much RDRAM is installed to the system, so we have to presume 8 MiB.
+ * of how much RDRAM is installed to the system, so we'll use signal handlers
+ * to catch memory segment access faults in the trial search to find it out.
*/
-#define MAX_DRAM_ADDR 0x007FFFFFul
-#define MAX_DRAM_DMA_ADDR (MAX_DRAM_ADDR & ~7)
+extern unsigned long su_max_address;
/*
* Interact with memory using server-side byte order (MIPS big-endian) or
@@ -79,7 +77,9 @@
typedef enum {
zero = 0,
+
at = 1,
+
#ifdef TRUE_MIPS_AND_NOT_JUST_THE_RSP_SUBSET
v0 = 2,
v1 = 3,
@@ -117,7 +117,9 @@ typedef enum {
sp = 29,
fp = 30, /* new, official MIPS name for it: "frame pointer" */
ra = 31,
- S8 = fp
+
+ NUMBER_OF_SCALAR_REGISTERS,
+ S8 = fp /* older name for GPR $fp as of the R4000 ISA */
} GPR_specifier;
extern RSP_INFO RSP_INFO_NAME;
@@ -125,7 +127,7 @@ extern pu8 DRAM;
extern pu8 DMEM;
extern pu8 IMEM;
-extern u8 conf[32];
+extern u8 conf[];
/*
* general-purpose scalar registers
@@ -133,7 +135,7 @@ extern u8 conf[32];
* based on the MIPS instruction set architecture but without most of the
* original register names (for example, no kernel-reserved registers)
*/
-extern u32 SR[32];
+extern u32 SR[];
#define FIT_IMEM(PC) ((PC) & 0xFFFu & 0xFFCu)
@@ -155,7 +157,7 @@ int stage;
extern int temp_PC;
#ifdef WAIT_FOR_CPU_HOST
-extern short MFC0_count[32];
+extern short MFC0_count[];
/* Keep one C0 MF status read count for each scalar register. */
#endif
@@ -266,8 +268,28 @@ extern void set_PC(unsigned int address);
#define SP_STATUS_SIG6 (0x00000001ul << 13)
#define SP_STATUS_SIG7 (0x00000001ul << 14)
-#define NUMBER_OF_CP0_REGISTERS 16
-extern pu32 CR[NUMBER_OF_CP0_REGISTERS];
+enum {
+ RCP_SP_MEM_ADDR_REG,
+ RCP_SP_DRAM_ADDR_REG,
+ RCP_SP_RD_LEN_REG,
+ RCP_SP_WR_LEN_REG,
+ RCP_SP_STATUS_REG,
+ RCP_SP_DMA_FULL_REG,
+ RCP_SP_DMA_BUSY_REG,
+ RCP_SP_SEMAPHORE_REG,
+
+ RCP_DPC_START_REG,
+ RCP_DPC_END_REG,
+ RCP_DPC_CURRENT_REG,
+ RCP_DPC_STATUS_REG,
+ RCP_DPC_CLOCK_REG,
+ RCP_DPC_BUFBUSY_REG,
+ RCP_DPC_PIPEBUSY_REG,
+ RCP_DPC_TMEM_REG,
+
+ NUMBER_OF_CP0_REGISTERS
+} CPR_specifier;
+extern pu32 CR[];
extern void SP_DMA_READ(void);
extern void SP_DMA_WRITE(void);
diff --git a/vu/add.c b/vu/add.c
index 06e2fa4d..836f414b 100644
--- a/vu/add.c
+++ b/vu/add.c
@@ -1,7 +1,7 @@
/******************************************************************************\
* Project: MSP Simulation Layer for Vector Unit Computational Adds *
* Authors: Iconoclast *
-* Release: 2016.03.23 *
+* Release: 2018.03.18 *
* License: CC0 Public Domain Dedication *
* *
* To the extent possible under law, the author(s) have dedicated all copyright *
@@ -72,7 +72,7 @@ static INLINE void SIGNED_CLAMP_ADD(pi16 VD, pi16 VS, pi16 VT)
{
i32 sum[N];
i16 hi[N], lo[N];
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
sum[i] = VS[i] + VT[i] + cf_co[i];
@@ -93,7 +93,7 @@ static INLINE void SIGNED_CLAMP_SUB(pi16 VD, pi16 VS, pi16 VT)
{
i32 dif[N];
i16 hi[N], lo[N];
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
dif[i] = VS[i] - VT[i] - cf_co[i];
@@ -114,7 +114,7 @@ static INLINE void SIGNED_CLAMP_SUB(pi16 VD, pi16 VS, pi16 VT)
INLINE static void clr_ci(pi16 VD, pi16 VS, pi16 VT)
{ /* clear CARRY and carry in to accumulators */
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
VACC_L[i] = VS[i] + VT[i] + cf_co[i];
@@ -128,7 +128,7 @@ INLINE static void clr_ci(pi16 VD, pi16 VS, pi16 VT)
INLINE static void clr_bi(pi16 VD, pi16 VS, pi16 VT)
{ /* clear CARRY and borrow in to accumulators */
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
VACC_L[i] = VS[i] - VT[i] - cf_co[i];
@@ -151,7 +151,7 @@ INLINE static void do_abs(pi16 VD, pi16 VS, pi16 VT)
i16 neg[N], pos[N];
i16 nez[N], cch[N]; /* corner case hack -- abs(-32768) == +32767 */
ALIGNED i16 res[N];
- register int i;
+ register unsigned int i;
vector_copy(res, VT);
for (i = 0; i < N; i++)
@@ -180,7 +180,7 @@ INLINE static void do_abs(pi16 VD, pi16 VS, pi16 VT)
INLINE static void set_co(pi16 VD, pi16 VS, pi16 VT)
{ /* set CARRY and carry out from sum */
i32 sum[N];
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
sum[i] = (u16)(VS[i]) + (u16)(VT[i]);
@@ -197,7 +197,7 @@ INLINE static void set_co(pi16 VD, pi16 VS, pi16 VT)
INLINE static void set_bo(pi16 VD, pi16 VS, pi16 VT)
{ /* set CARRY and borrow out from difference */
i32 dif[N];
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
dif[i] = (u16)(VS[i]) - (u16)(VT[i]);
diff --git a/vu/multiply.c b/vu/multiply.c
index d2ccd70f..dcf9504b 100644
--- a/vu/multiply.c
+++ b/vu/multiply.c
@@ -1,7 +1,7 @@
/******************************************************************************\
* Project: MSP Simulation Layer for Vector Unit Computational Multiplies *
* Authors: Iconoclast *
-* Release: 2015.11.30 *
+* Release: 2018.03.18 *
* License: CC0 Public Domain Dedication *
* *
* To the extent possible under law, the author(s) have dedicated all copyright *
@@ -16,35 +16,24 @@
#include "multiply.h"
#ifdef ARCH_MIN_SSE2
-#define _mm_cmple_epu16(dst, src) \
- _mm_cmpeq_epi16(_mm_subs_epu16(dst, src), _mm_setzero_si128())
-#define _mm_cmpgt_epu16(dst, src) \
- _mm_andnot_si128(_mm_cmpeq_epi16(dst, src), _mm_cmple_epu16(src, dst))
-#define _mm_cmplt_epu16(dst, src) \
- _mm_cmpgt_epu16(src, dst)
-#define _mm_mullo_epu16(dst, src) \
- _mm_mullo_epi16(dst, src)
+#define _mm_allones_si128() \
+ _mm_cmpeq_epi16(_mm_setzero_si128(), _mm_setzero_si128())
+#define _mm_setmin_epi16() \
+ _mm_slli_epi16(_mm_allones_si128(), 15)
-static INLINE void SIGNED_CLAMP_AM(pi16 VD)
-{ /* typical sign-clamp of accumulator-mid (bits 31:16) */
- v16 dst, src;
- v16 pvd, pvs;
+#define _mm_cmplt_epu16(dst, src) \
+ _mm_cmplt_epi16( \
+ _mm_xor_si128(dst, _mm_setmin_epi16()), \
+ _mm_xor_si128(src, _mm_setmin_epi16()) \
+ )
- pvs = _mm_load_si128((v16 *)VACC_H);
- pvd = _mm_load_si128((v16 *)VACC_M);
- dst = _mm_unpacklo_epi16(pvd, pvs);
- src = _mm_unpackhi_epi16(pvd, pvs);
-
- dst = _mm_packs_epi32(dst, src);
- _mm_store_si128((v16 *)VD, dst);
- return;
-}
#else
+
static INLINE void SIGNED_CLAMP_AM(pi16 VD)
{ /* typical sign-clamp of accumulator-mid (bits 31:16) */
i16 hi[N], lo[N];
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
lo[i] = (VACC_H[i] < ~0);
@@ -61,15 +50,13 @@ static INLINE void SIGNED_CLAMP_AM(pi16 VD)
VD[i] |= -(hi[i] ^ 0);
for (i = 0; i < N; i++)
VD[i] ^= 0x8000 * (hi[i] | lo[i]);
- return;
}
-#endif
static INLINE void UNSIGNED_CLAMP(pi16 VD)
{ /* sign-zero hybrid clamp of accumulator-mid (bits 31:16) */
ALIGNED i16 temp[N];
i16 cond[N];
- register int i;
+ register unsigned int i;
SIGNED_CLAMP_AM(temp); /* no direct map in SSE, but closely based on this */
for (i = 0; i < N; i++)
@@ -78,14 +65,13 @@ static INLINE void UNSIGNED_CLAMP(pi16 VD)
VD[i] = temp[i] & ~(temp[i] >> 15); /* Only this clamp is unsigned. */
for (i = 0; i < N; i++)
VD[i] = VD[i] | cond[i];
- return;
}
static INLINE void SIGNED_CLAMP_AL(pi16 VD)
{ /* sign-clamp accumulator-low (bits 15:0) */
ALIGNED i16 temp[N];
i16 cond[N];
- register int i;
+ register unsigned int i;
SIGNED_CLAMP_AM(temp); /* no direct map in SSE, but closely based on this */
for (i = 0; i < N; i++)
@@ -94,64 +80,8 @@ static INLINE void SIGNED_CLAMP_AL(pi16 VD)
temp[i] ^= 0x8000; /* clamps 0x0000:0xFFFF instead of -0x8000:+0x7FFF */
for (i = 0; i < N; i++)
VD[i] = (cond[i] ? temp[i] : VACC_L[i]);
- return;
-}
-
-INLINE static void do_macf(pi16 VD, pi16 VS, pi16 VT)
-{
- i32 product[N];
- u32 addend[N];
- register int i;
-
- for (i = 0; i < N; i++)
- product[i] = VS[i] * VT[i];
- for (i = 0; i < N; i++)
- addend[i] = (product[i] << 1) & 0x00000000FFFF;
- for (i = 0; i < N; i++)
- addend[i] = (u16)(VACC_L[i]) + addend[i];
- for (i = 0; i < N; i++)
- VACC_L[i] = (i16)(addend[i]);
- for (i = 0; i < N; i++)
- addend[i] = (addend[i] >> 16) + (u16)(product[i] >> 15);
- for (i = 0; i < N; i++)
- addend[i] = (u16)(VACC_M[i]) + addend[i];
- for (i = 0; i < N; i++)
- VACC_M[i] = (i16)(addend[i]);
- for (i = 0; i < N; i++)
- VACC_H[i] -= (product[i] < 0);
- for (i = 0; i < N; i++)
- VACC_H[i] += addend[i] >> 16;
- SIGNED_CLAMP_AM(VD);
- return;
-}
-
-INLINE static void do_macu(pi16 VD, pi16 VS, pi16 VT)
-{
- i32 product[N];
- u32 addend[N];
- register int i;
-
- for (i = 0; i < N; i++)
- product[i] = VS[i] * VT[i];
- for (i = 0; i < N; i++)
- addend[i] = (product[i] << 1) & 0x00000000FFFF;
- for (i = 0; i < N; i++)
- addend[i] = (u16)(VACC_L[i]) + addend[i];
- for (i = 0; i < N; i++)
- VACC_L[i] = (i16)(addend[i]);
- for (i = 0; i < N; i++)
- addend[i] = (addend[i] >> 16) + (u16)(product[i] >> 15);
- for (i = 0; i < N; i++)
- addend[i] = (u16)(VACC_M[i]) + addend[i];
- for (i = 0; i < N; i++)
- VACC_M[i] = (i16)(addend[i]);
- for (i = 0; i < N; i++)
- VACC_H[i] -= (product[i] < 0);
- for (i = 0; i < N; i++)
- VACC_H[i] += addend[i] >> 16;
- UNSIGNED_CLAMP(VD);
- return;
}
+#endif
VECTOR_OPERATION VMULF(v16 vs, v16 vt)
{
@@ -202,8 +132,7 @@ VECTOR_OPERATION VMULF(v16 vs, v16 vt)
negative = _mm_xor_si128(negative, vs);
*(v16 *)VACC_H = negative; /* 2*i16*i16 only fills L/M; VACC_H = 0 or ~0. */
- vs = _mm_add_epi16(vs, prod_hi); /* prod_hi must be -32768; + -1 = +32767 */
- return (vs);
+ return _mm_add_epi16(vs, prod_hi); /* prod_hi must be -32768; - 1 = +32767 */
#else
word_64 product[N]; /* (-32768 * -32768)<<1 + 32768 confuses 32-bit type. */
register unsigned int i;
@@ -221,7 +150,6 @@ VECTOR_OPERATION VMULF(v16 vs, v16 vt)
for (i = 0; i < N; i++)
VACC_H[i] = -(product[i].SW < 0); /* product>>32 & 0xFFFF */
SIGNED_CLAMP_AM(V_result);
- return;
#endif
}
@@ -266,8 +194,7 @@ VECTOR_OPERATION VMULU(v16 vs, v16 vt)
prod_lo = _mm_srai_epi16(prod_hi, 15); /* unsigned overflow mask */
vs = _mm_or_si128(prod_hi, prod_lo);
- vs = _mm_andnot_si128(negative, vs); /* unsigned underflow mask */
- return (vs);
+ return _mm_andnot_si128(negative, vs); /* unsigned underflow mask */
#else
word_64 product[N]; /* (-32768 * -32768)<<1 + 32768 confuses 32-bit type. */
register unsigned int i;
@@ -285,7 +212,6 @@ VECTOR_OPERATION VMULU(v16 vs, v16 vt)
for (i = 0; i < N; i++)
VACC_H[i] = -(product[i].SW < 0); /* product>>32 & 0xFFFF */
UNSIGNED_CLAMP(V_result);
- return;
#endif
}
@@ -309,7 +235,6 @@ VECTOR_OPERATION VMUDL(v16 vs, v16 vt)
vector_copy(V_result, VACC_L);
vector_wipe(VACC_M);
vector_wipe(VACC_H);
- return;
#endif
}
@@ -348,7 +273,6 @@ VECTOR_OPERATION VMUDM(v16 vs, v16 vt)
for (i = 0; i < N; i++)
VACC_H[i] = -(VACC_M[i] < 0);
vector_copy(V_result, VACC_M);
- return;
#endif
}
@@ -372,7 +296,7 @@ VECTOR_OPERATION VMUDN(v16 vs, v16 vt)
*(v16 *)VACC_M = prod_hi;
prod_hi = _mm_srai_epi16(prod_hi, 15);
*(v16 *)VACC_H = prod_hi;
- return (vs = prod_lo);
+ return (prod_lo);
#else
word_32 product[N];
register unsigned int i;
@@ -386,7 +310,6 @@ VECTOR_OPERATION VMUDN(v16 vs, v16 vt)
for (i = 0; i < N; i++)
VACC_H[i] = -(VACC_M[i] < 0);
vector_copy(V_result, VACC_L);
- return;
#endif
}
@@ -413,8 +336,7 @@ VECTOR_OPERATION VMUDH(v16 vs, v16 vt)
* Re-interleave or pack both 32-bit products in both xmm registers with
* signed saturation: prod < -32768 to -32768 and prod > +32767 to +32767.
*/
- vs = _mm_packs_epi32(vs, vt);
- return (vs);
+ return _mm_packs_epi32(vs, vt);
#else
word_32 product[N];
register unsigned int i;
@@ -427,57 +349,143 @@ VECTOR_OPERATION VMUDH(v16 vs, v16 vt)
for (i = 0; i < N; i++)
VACC_H[i] = (s16)(product[i].W >> 16); /* product[i].HW[HES(2) >> 1] */
SIGNED_CLAMP_AM(V_result);
- return;
#endif
}
VECTOR_OPERATION VMACF(v16 vs, v16 vt)
{
- ALIGNED i16 VD[N];
#ifdef ARCH_MIN_SSE2
- ALIGNED i16 VS[N], VT[N];
+ v16 acc_hi, acc_md, acc_lo;
+ v16 prod_hi, prod_lo;
+ v16 overflow, overflow_new;
+ v16 prod_neg, carry;
- *(v16 *)VS = vs;
- *(v16 *)VT = vt;
-#else
- v16 VS, VT;
+ prod_hi = _mm_mulhi_epi16(vs, vt);
+ prod_lo = _mm_mullo_epi16(vs, vt);
+ prod_neg = _mm_srli_epi16(prod_hi, 15);
- VS = vs;
- VT = vt;
-#endif
- do_macf(VD, VS, VT);
-#ifdef ARCH_MIN_SSE2
- COMPILER_FENCE();
- vs = *(v16 *)VD;
- return (vs);
+ /* fractional adjustment by shifting left one bit */
+ overflow = _mm_srli_epi16(prod_lo, 15); /* hi bit lost when s16 += s16 */
+ prod_lo = _mm_add_epi16(prod_lo, prod_lo);
+ prod_hi = _mm_add_epi16(prod_hi, prod_hi);
+ prod_hi = _mm_or_si128(prod_hi, overflow); /* Carry lo's MSB to hi's LSB. */
+
+ acc_lo = *(v16 *)VACC_L;
+ acc_md = *(v16 *)VACC_M;
+ acc_hi = *(v16 *)VACC_H;
+
+ acc_lo = _mm_add_epi16(acc_lo, prod_lo);
+ *(v16 *)VACC_L = acc_lo;
+ overflow = _mm_cmplt_epu16(acc_lo, prod_lo); /* a + b < a + 0 ? ~0 : 0 */
+
+ acc_md = _mm_add_epi16(acc_md, prod_hi);
+ overflow_new = _mm_cmplt_epu16(acc_md, prod_hi);
+ acc_md = _mm_sub_epi16(acc_md, overflow); /* m - (overflow = ~0) == m + 1 */
+ carry = _mm_cmpeq_epi16(acc_md, _mm_setzero_si128());
+ carry = _mm_and_si128(carry, overflow); /* ~0 - (-1) == 0 && (-1) != 0 */
+ *(v16 *)VACC_M = acc_md;
+ overflow = _mm_or_si128(carry, overflow_new);
+
+ acc_hi = _mm_sub_epi16(acc_hi, overflow);
+ acc_hi = _mm_sub_epi16(acc_hi, prod_neg);
+ *(v16 *)VACC_H = acc_hi;
+
+ vt = _mm_unpackhi_epi16(acc_md, acc_hi);
+ vs = _mm_unpacklo_epi16(acc_md, acc_hi);
+ return _mm_packs_epi32(vs, vt);
#else
- vector_copy(V_result, VD);
- return;
+ word_32 product[N], addend[N];
+ register unsigned int i;
+
+ for (i = 0; i < N; i++)
+ product[i].SW = vs[i] * vt[i];
+ for (i = 0; i < N; i++)
+ addend[i].UW = (product[i].SW << 1) & 0x00000000FFFF;
+ for (i = 0; i < N; i++)
+ addend[i].UW = (u16)(VACC_L[i]) + addend[i].UW;
+ for (i = 0; i < N; i++)
+ VACC_L[i] = (i16)(addend[i].UW);
+ for (i = 0; i < N; i++)
+ addend[i].UW = (addend[i].UW >> 16) + (u16)(product[i].SW >> 15);
+ for (i = 0; i < N; i++)
+ addend[i].UW = (u16)(VACC_M[i]) + addend[i].UW;
+ for (i = 0; i < N; i++)
+ VACC_M[i] = (i16)(addend[i].UW);
+ for (i = 0; i < N; i++)
+ VACC_H[i] -= (product[i].SW < 0);
+ for (i = 0; i < N; i++)
+ VACC_H[i] += addend[i].UW >> 16;
+ SIGNED_CLAMP_AM(V_result);
#endif
}
VECTOR_OPERATION VMACU(v16 vs, v16 vt)
{
- ALIGNED i16 VD[N];
#ifdef ARCH_MIN_SSE2
- ALIGNED i16 VS[N], VT[N];
+ v16 acc_hi, acc_md, acc_lo;
+ v16 prod_hi, prod_lo;
+ v16 overflow, overflow_new;
+ v16 prod_neg, carry;
- *(v16 *)VS = vs;
- *(v16 *)VT = vt;
-#else
- v16 VS, VT;
+ prod_hi = _mm_mulhi_epi16(vs, vt);
+ prod_lo = _mm_mullo_epi16(vs, vt);
+ prod_neg = _mm_srli_epi16(prod_hi, 15);
- VS = vs;
- VT = vt;
-#endif
- do_macu(VD, VS, VT);
-#ifdef ARCH_MIN_SSE2
- COMPILER_FENCE();
- vs = *(v16 *)VD;
- return (vs);
+ /* fractional adjustment by shifting left one bit */
+ overflow = _mm_srli_epi16(prod_lo, 15); /* hi bit lost when s16 += s16 */
+ prod_lo = _mm_add_epi16(prod_lo, prod_lo);
+ prod_hi = _mm_add_epi16(prod_hi, prod_hi);
+ prod_hi = _mm_or_si128(prod_hi, overflow); /* Carry lo's MSB to hi's LSB. */
+
+ acc_lo = *(v16 *)VACC_L;
+ acc_md = *(v16 *)VACC_M;
+ acc_hi = *(v16 *)VACC_H;
+
+ acc_lo = _mm_add_epi16(acc_lo, prod_lo);
+ *(v16 *)VACC_L = acc_lo;
+ overflow = _mm_cmplt_epu16(acc_lo, prod_lo); /* a + b < a + 0 ? ~0 : 0 */
+
+ acc_md = _mm_add_epi16(acc_md, prod_hi);
+ overflow_new = _mm_cmplt_epu16(acc_md, prod_hi);
+ acc_md = _mm_sub_epi16(acc_md, overflow); /* m - (overflow = ~0) == m + 1 */
+ carry = _mm_cmpeq_epi16(acc_md, _mm_setzero_si128());
+ carry = _mm_and_si128(carry, overflow); /* ~0 - (-1) == 0 && (-1) != 0 */
+ *(v16 *)VACC_M = acc_md;
+ overflow = _mm_or_si128(carry, overflow_new);
+
+ acc_hi = _mm_sub_epi16(acc_hi, overflow);
+ acc_hi = _mm_sub_epi16(acc_hi, prod_neg);
+ *(v16 *)VACC_H = acc_hi;
+
+ vt = _mm_unpackhi_epi16(acc_md, acc_hi);
+ vs = _mm_unpacklo_epi16(acc_md, acc_hi);
+ vs = _mm_packs_epi32(vs, vt);
+ overflow = _mm_cmplt_epi16(acc_md, vs);
+ vs = _mm_andnot_si128(_mm_srai_epi16(vs, 15), vs);
+ return _mm_or_si128(vs, overflow);
#else
- vector_copy(V_result, VD);
- return;
+ word_32 product[N], addend[N];
+ register unsigned int i;
+
+ for (i = 0; i < N; i++)
+ product[i].SW = vs[i] * vt[i];
+ for (i = 0; i < N; i++)
+ addend[i].UW = (product[i].SW << 1) & 0x00000000FFFF;
+ for (i = 0; i < N; i++)
+ addend[i].UW = (u16)(VACC_L[i]) + addend[i].UW;
+ for (i = 0; i < N; i++)
+ VACC_L[i] = (i16)(addend[i].UW);
+ for (i = 0; i < N; i++)
+ addend[i].UW = (addend[i].UW >> 16) + (u16)(product[i].SW >> 15);
+ for (i = 0; i < N; i++)
+ addend[i].UW = (u16)(VACC_M[i]) + addend[i].UW;
+ for (i = 0; i < N; i++)
+ VACC_M[i] = (i16)(addend[i].UW);
+ for (i = 0; i < N; i++)
+ VACC_H[i] -= (product[i].SW < 0);
+ for (i = 0; i < N; i++)
+ VACC_H[i] += addend[i].UW >> 16;
+ UNSIGNED_CLAMP(V_result);
#endif
}
@@ -488,7 +496,7 @@ VECTOR_OPERATION VMADL(v16 vs, v16 vt)
v16 prod_hi;
v16 overflow, overflow_new;
- /* prod_lo = _mm_mullo_epu16(vs, vt); */
+ /* prod_lo = _mm_mullo_epi16(vs, vt); */
prod_hi = _mm_mulhi_epu16(vs, vt);
acc_lo = *(v16 *)VACC_L;
@@ -531,8 +539,7 @@ VECTOR_OPERATION VMADL(v16 vs, v16 vt)
vs = _mm_and_si128(vs, acc_md); /* ... ? VS_clamped : 0x0000 */
vs = _mm_or_si128(vs, acc_lo); /* : acc_lo */
acc_md = _mm_slli_epi16(acc_md, 15); /* ... ? ^ 0x8000 : ^ 0x0000 */
- vs = _mm_xor_si128(vs, acc_md); /* Stupid unsigned-clamp-ish adjustment. */
- return (vs);
+ return _mm_xor_si128(vs, acc_md); /* stupid unsigned-clamp-ish adjustment */
#else
word_32 product[N], addend[N];
register unsigned int i;
@@ -552,7 +559,6 @@ VECTOR_OPERATION VMADL(v16 vs, v16 vt)
for (i = 0; i < N; i++)
VACC_H[i] += addend[i].UW >> 16;
SIGNED_CLAMP_AL(V_result);
- return;
#endif
}
@@ -594,8 +600,7 @@ VECTOR_OPERATION VMADM(v16 vs, v16 vt)
vt = _mm_unpackhi_epi16(acc_md, acc_hi);
vs = _mm_unpacklo_epi16(acc_md, acc_hi);
- vs = _mm_packs_epi32(vs, vt);
- return (vs);
+ return _mm_packs_epi32(vs, vt);
#else
word_32 product[N], addend[N];
register unsigned int i;
@@ -615,7 +620,6 @@ VECTOR_OPERATION VMADM(v16 vs, v16 vt)
for (i = 0; i < N; i++)
VACC_H[i] += addend[i].UW >> 16;
SIGNED_CLAMP_AM(V_result);
- return;
#endif
}
@@ -675,8 +679,7 @@ VECTOR_OPERATION VMADN(v16 vs, v16 vt)
vs = _mm_and_si128(vs, acc_md); /* ... ? VS_clamped : 0x0000 */
vs = _mm_or_si128(vs, acc_lo); /* : acc_lo */
acc_md = _mm_slli_epi16(acc_md, 15); /* ... ? ^ 0x8000 : ^ 0x0000 */
- vs = _mm_xor_si128(vs, acc_md); /* Stupid unsigned-clamp-ish adjustment. */
- return (vs);
+ return _mm_xor_si128(vs, acc_md); /* stupid unsigned-clamp-ish adjustment */
#else
word_32 product[N], addend[N];
register unsigned int i;
@@ -696,7 +699,6 @@ VECTOR_OPERATION VMADN(v16 vs, v16 vt)
for (i = 0; i < N; i++)
VACC_H[i] += addend[i].UW >> 16;
SIGNED_CLAMP_AL(V_result);
- return;
#endif
}
@@ -734,8 +736,7 @@ VECTOR_OPERATION VMADH(v16 vs, v16 vt)
vs = *(v16 *)VACC_M;
prod_high = _mm_unpackhi_epi16(vs, vt);
vs = _mm_unpacklo_epi16(vs, vt);
- vs = _mm_packs_epi32(vs, prod_high);
- return (vs);
+ return _mm_packs_epi32(vs, prod_high);
#else
word_32 product[N], addend[N];
register unsigned int i;
@@ -749,6 +750,5 @@ VECTOR_OPERATION VMADH(v16 vs, v16 vt)
for (i = 0; i < N; i++)
VACC_H[i] += (addend[i].UW >> 16) + (product[i].SW >> 16);
SIGNED_CLAMP_AM(V_result);
- return;
#endif
}
diff --git a/vu/select.c b/vu/select.c
index a1c50925..3d290cee 100644
--- a/vu/select.c
+++ b/vu/select.c
@@ -1,7 +1,7 @@
/******************************************************************************\
* Project: MSP Simulation Layer for Vector Unit Computational Test Selects *
* Authors: Iconoclast *
-* Release: 2015.01.30 *
+* Release: 2018.03.18 *
* License: CC0 Public Domain Dedication *
* *
* To the extent possible under law, the author(s) have dedicated all copyright *
@@ -29,7 +29,7 @@
*/
static void merge(pi16 VD, pi16 cmp, pi16 pass, pi16 fail)
{
- register int i;
+ register unsigned int i;
#if (0 != 0)
/* Do not use this version yet, as it still does not vectorize to SSE2. */
for (i = 0; i < N; i++)
@@ -49,7 +49,7 @@ INLINE static void do_lt(pi16 VD, pi16 VS, pi16 VT)
{
i16 cn[N];
i16 eq[N];
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
eq[i] = (VS[i] == VT[i]);
@@ -75,7 +75,7 @@ INLINE static void do_lt(pi16 VD, pi16 VS, pi16 VT)
INLINE static void do_eq(pi16 VD, pi16 VS, pi16 VT)
{
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
cf_comp[i] = (VS[i] == VT[i]);
@@ -98,7 +98,7 @@ INLINE static void do_eq(pi16 VD, pi16 VS, pi16 VT)
INLINE static void do_ne(pi16 VD, pi16 VS, pi16 VT)
{
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
cf_comp[i] = (VS[i] != VT[i]);
@@ -123,7 +123,7 @@ INLINE static void do_ge(pi16 VD, pi16 VS, pi16 VT)
{
i16 ce[N];
i16 eq[N];
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
eq[i] = (VS[i] == VT[i]);
@@ -154,7 +154,7 @@ INLINE static void do_cl(pi16 VD, pi16 VS, pi16 VT)
ALIGNED i16 gen[N], len[N], lz[N], uz[N], sn[N];
i16 diff[N];
i16 cmp[N];
- register int i;
+ register unsigned int i;
vector_copy((pi16)VB, VS);
vector_copy((pi16)VC, VT);
@@ -230,7 +230,7 @@ INLINE static void do_ch(pi16 VD, pi16 VS, pi16 VT)
i16 diff[N];
#endif
i16 cch[N]; /* corner case hack: -(-32768) with undefined sign */
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
cch[i] = (VT[i] == -32768) ? ~0 : 0; /* -(-32768) might not be >= 0. */
@@ -297,7 +297,7 @@ INLINE static void do_cr(pi16 VD, pi16 VS, pi16 VT)
ALIGNED i16 ge[N], le[N], sn[N];
ALIGNED i16 VC[N];
i16 cmp[N];
- register int i;
+ register unsigned int i;
vector_copy(VC, VT);
for (i = 0; i < N; i++)
diff --git a/vu/vu.c b/vu/vu.c
index 1763a626..0af953a5 100644
--- a/vu/vu.c
+++ b/vu/vu.c
@@ -1,7 +1,7 @@
/******************************************************************************\
* Project: MSP Emulation Layer for Vector Unit Computational Operations *
* Authors: Iconoclast *
-* Release: 2016.03.23 *
+* Release: 2018.03.18 *
* License: CC0 Public Domain Dedication *
* *
* To the extent possible under law, the author(s) have dedicated all copyright *
@@ -133,19 +133,20 @@ u16 get_VCC(void)
}
u8 get_VCE(void)
{
- int result;
+ unsigned int result;
register u8 vce;
result = 0x00
- | (cf_vce[07] << 0x7)
- | (cf_vce[06] << 0x6)
- | (cf_vce[05] << 0x5)
- | (cf_vce[04] << 0x4)
- | (cf_vce[03] << 0x3)
- | (cf_vce[02] << 0x2)
- | (cf_vce[01] << 0x1)
- | (cf_vce[00] << 0x0);
- vce = result & 0xFF;
+ | (cf_vce[0x7] << 0x7)
+ | (cf_vce[0x6] << 0x6)
+ | (cf_vce[0x5] << 0x5)
+ | (cf_vce[0x4] << 0x4)
+ | (cf_vce[0x3] << 0x3)
+ | (cf_vce[0x2] << 0x2)
+ | (cf_vce[0x1] << 0x1)
+ | (cf_vce[0x0] << 0x0)
+ ;
+ vce = (u8)(result & 0xFF);
return (vce); /* Big endian becomes little. */
}
#else
@@ -207,7 +208,7 @@ u8 get_VCE(void)
*/
void set_VCO(u16 vco)
{
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
cf_co[i] = (vco >> (i + 0x0)) & 1;
@@ -217,7 +218,7 @@ void set_VCO(u16 vco)
}
void set_VCC(u16 vcc)
{
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
cf_comp[i] = (vcc >> (i + 0x0)) & 1;
@@ -227,7 +228,7 @@ void set_VCC(u16 vcc)
}
void set_VCE(u8 vce)
{
- register int i;
+ register unsigned int i;
for (i = 0; i < N; i++)
cf_vce[i] = (vce >> i) & 1;