From ee9675389d1095a6d9a650daeb48eb2e9090fdcf Mon Sep 17 00:00:00 2001 From: Darryl Pogue Date: Sun, 19 Feb 2023 13:30:19 -0800 Subject: [PATCH] Initial pass of avatar rendering (with no textures) --- .../FeatureLib/pfDXPipeline/plDXPipeline.cpp | 167 +----------------- .../FeatureLib/pfDXPipeline/plDXPipeline.h | 1 - .../FeatureLib/pfGLPipeline/plGLDevice.cpp | 15 +- .../FeatureLib/pfGLPipeline/plGLPipeline.cpp | 161 +++++++++++++++-- .../FeatureLib/pfGLPipeline/plGLPipeline.h | 1 + .../PubUtilLib/plPipeline/pl3DPipeline.cpp | 10 ++ .../PubUtilLib/plPipeline/pl3DPipeline.h | 161 +++++++++++++++++ 7 files changed, 339 insertions(+), 177 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfDXPipeline/plDXPipeline.cpp b/Sources/Plasma/FeatureLib/pfDXPipeline/plDXPipeline.cpp index 48394d15b0..c0a9ac628e 100644 --- a/Sources/Plasma/FeatureLib/pfDXPipeline/plDXPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfDXPipeline/plDXPipeline.cpp @@ -282,13 +282,10 @@ plProfile_Extern(LayChange); plProfile_Extern(DrawTriangles); plProfile_Extern(MatChange); plProfile_Extern(NumSkin); - -plProfile_CreateCounterNoReset("Reload", "PipeC", PipeReload); - +plProfile_Extern(PipeReload); plProfile_Extern(PrepShadows); plProfile_Extern(PrepDrawable); plProfile_Extern(Skin); -plProfile_Extern(AvatarSort); plProfile_Extern(ClearLights); plProfile_Extern(RenderSpan); plProfile_Extern(MergeCheck); @@ -304,6 +301,13 @@ plProfile_Extern(RenderPrim); plProfile_Extern(PlateMgr); plProfile_Extern(DebugText); plProfile_Extern(Reset); +plProfile_Extern(AvRTPoolUsed); +plProfile_Extern(AvRTPoolCount); +plProfile_Extern(AvRTPoolRes); +plProfile_Extern(AvRTShrinkTime); +plProfile_Extern(SpanMerge); +plProfile_Extern(MatLightState); +plProfile_Extern(EmptyList); plProfile_CreateMemCounter("DefMem", "PipeC", DefaultMem); plProfile_CreateMemCounter("ManMem", "PipeC", ManagedMem); @@ -313,18 +317,10 @@ plProfile_CreateMemCounterReset("fTexUsed", "PipeC", fTexUsed); plProfile_CreateMemCounterReset("fTexManaged", "PipeC", fTexManaged); plProfile_CreateMemCounterReset("fVtxUsed", "PipeC", fVtxUsed); plProfile_CreateMemCounterReset("fVtxManaged", "PipeC", fVtxManaged); -plProfile_CreateCounter("Merge", "PipeC", SpanMerge); plProfile_CreateCounter("TexNum", "PipeC", NumTex); -plProfile_CreateCounter("LiState", "PipeC", MatLightState); -plProfile_CreateCounter("AvatarFaces", "PipeC", AvatarFaces); plProfile_CreateCounter("VertexChange", "PipeC", VertexChange); plProfile_CreateCounter("IndexChange", "PipeC", IndexChange); plProfile_CreateCounter("DynVBuffs", "PipeC", DynVBuffs); -plProfile_CreateCounter("EmptyList", "PipeC", EmptyList); -plProfile_CreateCounter("AvRTPoolUsed", "PipeC", AvRTPoolUsed); -plProfile_CreateCounter("AvRTPoolCount", "PipeC", AvRTPoolCount); -plProfile_CreateCounter("AvRTPoolRes", "PipeC", AvRTPoolRes); -plProfile_CreateCounter("AvRTShrinkTime", "PipeC", AvRTShrinkTime); #ifndef PLASMA_EXTERNAL_RELEASE /// Fun inlines for keeping track of surface creation/deletion memory @@ -2316,153 +2312,6 @@ bool plDXPipeline::PreRender(plDrawable* drawable, std::vector& visLis return !visList.empty(); } -struct plSortFace -{ - uint16_t fIdx[3]; - float fDist; -}; - -struct plCompSortFace -{ - bool operator()( const plSortFace& lhs, const plSortFace& rhs) const - { - return lhs.fDist > rhs.fDist; - } -}; - -// IAvatarSort ///////////////////////////////////////////////////////////////////////// -// We handle avatar sort differently from the rest of the face sort. The reason is that -// within the single avatar index buffer, we want to only sort the faces of spans requesting -// a sort, and sort them in place. -// Contrast that with the normal scene translucency sort. There, we sort all the spans in a drawble, -// then we sort all the faces in that drawable, then for each span in the sorted span list, we extract -// the faces for that span appending onto the index buffer. This gives great efficiency because -// only the visible faces are sorted and they wind up packed into the front of the index buffer, which -// permits more batching. See plDrawableSpans::SortVisibleSpans. -// For the avatar, it's generally the case that all the avatar is visible or not, and there is only -// one material, so neither of those efficiencies is helpful. Moreover, for the avatar the faces we -// want sorted are a tiny subset of the avatar's faces. Moreover, and most importantly, for the avatar, we -// want to preserve the order that spans are drawn, so, for example, the opaque base head will always be -// drawn before the translucent hair fringe, which will always be drawn before the pink clear plastic baseball cap. -bool plDXPipeline::IAvatarSort(plDrawableSpans* d, const std::vector& visList) -{ - plProfile_BeginTiming(AvatarSort); - for (int16_t visIdx : visList) - { - hsAssert(d->GetSpan(visIdx)->fTypeMask & plSpan::kIcicleSpan, "Unknown type for sorting faces"); - - plIcicle* span = (plIcicle*)d->GetSpan(visIdx); - - if( span->fProps & plSpan::kPartialSort ) - { - hsAssert(d->GetBufferGroup(span->fGroupIdx)->AreIdxVolatile(), "Badly setup buffer group - set PartialSort too late?"); - - const hsPoint3 viewPos = GetViewPositionWorld(); - - plGBufferGroup* group = d->GetBufferGroup(span->fGroupIdx); - - plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)group->GetVertexBufferRef(span->fVBufferIdx); - - const uint8_t* vdata = vRef->fData; - const uint32_t stride = vRef->fVertexSize; - - const int numTris = span->fILength/3; - - static std::vector sortScratch; - sortScratch.resize(numTris); - - plProfile_IncCount(AvatarFaces, numTris); - - // - // Have three very similar sorts here, differing only on where the "position" of - // each triangle is defined, either as the center of the triangle, the nearest - // point on the triangle, or the farthest point on the triangle. - // Having tried all three on the avatar (the only thing this sort is used on), - // the best results surprisingly came from using the center of the triangle. - uint16_t* indices = group->GetIndexBufferData(span->fIBufferIdx) + span->fIStartIdx; - int j; - for( j = 0; j < numTris; j++ ) - { -#if 1 // TRICENTER - uint16_t idx = *indices++; - sortScratch[j].fIdx[0] = idx; - hsPoint3 pos = *(hsPoint3*)(vdata + idx * stride); - - idx = *indices++; - sortScratch[j].fIdx[1] = idx; - pos += *(hsPoint3*)(vdata + idx * stride); - - idx = *indices++; - sortScratch[j].fIdx[2] = idx; - pos += *(hsPoint3*)(vdata + idx * stride); - - pos *= 0.3333f; - - sortScratch[j].fDist = hsVector3(&pos, &viewPos).MagnitudeSquared(); -#elif 0 // NEAREST - uint16_t idx = *indices++; - sortScratch[j].fIdx[0] = idx; - hsPoint3 pos = *(hsPoint3*)(vdata + idx * stride); - float dist = hsVector3(&pos, &viewPos).MagnitudeSquared(); - float minDist = dist; - - idx = *indices++; - sortScratch[j].fIdx[1] = idx; - pos = *(hsPoint3*)(vdata + idx * stride); - dist = hsVector3(&pos, &viewPos).MagnitudeSquared(); - if( dist < minDist ) - minDist = dist; - - idx = *indices++; - sortScratch[j].fIdx[2] = idx; - pos = *(hsPoint3*)(vdata + idx * stride); - dist = hsVector3(&pos, &viewPos).MagnitudeSquared(); - if( dist < minDist ) - minDist = dist; - - sortScratch[j].fDist = minDist; -#elif 1 // FURTHEST - uint16_t idx = *indices++; - sortScratch[j].fIdx[0] = idx; - hsPoint3 pos = *(hsPoint3*)(vdata + idx * stride); - float dist = hsVector3(&pos, &viewPos).MagnitudeSquared(); - float maxDist = dist; - - idx = *indices++; - sortScratch[j].fIdx[1] = idx; - pos = *(hsPoint3*)(vdata + idx * stride); - dist = hsVector3(&pos, &viewPos).MagnitudeSquared(); - if( dist > maxDist ) - maxDist = dist; - - idx = *indices++; - sortScratch[j].fIdx[2] = idx; - pos = *(hsPoint3*)(vdata + idx * stride); - dist = hsVector3(&pos, &viewPos).MagnitudeSquared(); - if( dist > maxDist ) - maxDist = dist; - - sortScratch[j].fDist = maxDist; -#endif // SORTTYPES - } - - std::sort(sortScratch.begin(), sortScratch.end(), plCompSortFace()); - - indices = group->GetIndexBufferData(span->fIBufferIdx) + span->fIStartIdx; - for (const plSortFace& iter : sortScratch) - { - *indices++ = iter.fIdx[0]; - *indices++ = iter.fIdx[1]; - *indices++ = iter.fIdx[2]; - } - - group->DirtyIndexBuffer(span->fIBufferIdx); - } - } - plProfile_EndTiming(AvatarSort); - return true; -} - // PrepForRender ////////////////////////////////////////////////////////////////// // Make sure the given drawable and each of the spans to be drawn (as noted in the // indices in visList) is ready to be rendered. diff --git a/Sources/Plasma/FeatureLib/pfDXPipeline/plDXPipeline.h b/Sources/Plasma/FeatureLib/pfDXPipeline/plDXPipeline.h index 76a1e1d1e5..3263055ee5 100644 --- a/Sources/Plasma/FeatureLib/pfDXPipeline/plDXPipeline.h +++ b/Sources/Plasma/FeatureLib/pfDXPipeline/plDXPipeline.h @@ -374,7 +374,6 @@ class plDXPipeline : public pl3DPipeline // Visualization of active occluders void IMakeOcclusionSnap(); - bool IAvatarSort(plDrawableSpans* d, const std::vector& visList); void IBlendVertsIntoBuffer( plSpan* span, hsMatrix44* matrixPalette, int numMatrices, const uint8_t *src, uint8_t format, uint32_t srcStride, diff --git a/Sources/Plasma/FeatureLib/pfGLPipeline/plGLDevice.cpp b/Sources/Plasma/FeatureLib/pfGLPipeline/plGLDevice.cpp index b693ebe7a9..2e132ed4df 100644 --- a/Sources/Plasma/FeatureLib/pfGLPipeline/plGLDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfGLPipeline/plGLDevice.cpp @@ -346,7 +346,7 @@ bool plGLDevice::InitDevice() // ANGLE. // // On Linux, this should be true with mesa or nvidia drivers. - if (epoxy_has_egl()) + if (epoxy_has_egl() && fContextType == kNone) InitEGLDevice(this); #endif @@ -456,6 +456,19 @@ bool plGLDevice::BeginRender() return false; } +#ifdef USE_EGL + if (fContextType == kEGL) { + EGLDisplay display = static_cast(fDisplay); + EGLContext context = static_cast(fContext); + EGLSurface surface = static_cast(fSurface); + + if (eglMakeCurrent(display, surface, surface, context) == EGL_FALSE) { + fErrorMsg = "Failed to attach EGL context to surface"; + return false; + } + } //else +#endif + return true; } diff --git a/Sources/Plasma/FeatureLib/pfGLPipeline/plGLPipeline.cpp b/Sources/Plasma/FeatureLib/pfGLPipeline/plGLPipeline.cpp index 4f4e5da991..4787bc6f19 100644 --- a/Sources/Plasma/FeatureLib/pfGLPipeline/plGLPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfGLPipeline/plGLPipeline.cpp @@ -61,7 +61,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plPipeDebugFlags.h" #include "plPipeResReq.h" #include "plProfile.h" +#include "pnNetCommon/plNetApp.h" // for dbg logging #include "pnMessage/plPipeResMakeMsg.h" +#include "plAvatar/plAvatarClothing.h" +#include "plGImage/plMipmap.h" #include "plGLight/plLightInfo.h" #include "plPipeline/plCubicRenderTarget.h" #include "plPipeline/plDebugText.h" @@ -94,6 +97,14 @@ plProfile_Extern(PlateMgr); plProfile_Extern(DebugText); plProfile_Extern(Reset); plProfile_Extern(NumSkin); +plProfile_Extern(PipeReload); +plProfile_Extern(AvRTPoolUsed); +plProfile_Extern(AvRTPoolCount); +plProfile_Extern(AvRTPoolRes); +plProfile_Extern(AvRTShrinkTime); +plProfile_Extern(SpanMerge); +plProfile_Extern(MatLightState); +plProfile_Extern(EmptyList); // Adding a nil RenderPrim for turning off drawing static plRenderNilFunc sRenderNil; @@ -240,7 +251,9 @@ bool plGLPipeline::PrepForRender(plDrawable* drawable, std::vector& vis return false; } - // Other stuff that we're ignoring for now... + // Avatar face sorting happens after the software skin. + if (ice->GetNativeProperty(plDrawable::kPropPartialSort)) + IAvatarSort(ice, visList); plProfile_EndTiming(PrepDrawable); @@ -351,7 +364,7 @@ hsGDeviceRef* plGLPipeline::MakeRenderTargetRef(plRenderTarget* owner) // If we have Shader Model 3 and support non-POT textures, let's make reflections the pipe size if (plDynamicCamMap* camMap = plDynamicCamMap::ConvertNoRef(owner)) { - if (plQuality::GetCapability() > plQuality::kPS_2) + if (camMap->IsReflection() && plQuality::GetCapability() > plQuality::kPS_2) camMap->ResizeViewport(IGetViewTransform()); } @@ -386,13 +399,23 @@ hsGDeviceRef* plGLPipeline::MakeRenderTargetRef(plRenderTarget* owner) // See if it's a cubic render target. // Primary consumer here is the vertex/pixel shader water. if (plCubicRenderTarget* cubicRT = plCubicRenderTarget::ConvertNoRef(owner)) { - /// And create the ref (it'll know how to set all the flags) - //if (ref) - // ref->Set(surfFormat, 0, owner); - //else - // ref = new plGLRenderTargetRef(surfFormat, 0, owner); +#if 0 + if (!ref) + ref = new plGLRenderTargetRef(); + + ref->fOwner = owner; + ref->fDepthBuffer = depthBuffer; + ref->fMapping = GL_TEXTURE_CUBE_MAP; - // TODO: The rest + if (plGLVersion() >= 45) { + glCreateTextures(GL_TEXTURE_CUBE_MAP, 1, &ref->fRef); + // TODO: The rest + } else { + glGenTextures(1, &ref->fRef); + glBindTexture(GL_TEXTURE_CUBE_MAP, ref->fRef); + // TODO: The rest + } +#endif } // Not a cubic, is it a texture render target? These are currently used @@ -446,6 +469,7 @@ hsGDeviceRef* plGLPipeline::MakeRenderTargetRef(plRenderTarget* owner) // Keep it in a linked list for ready destruction. if (owner->GetDeviceRef() != ref) { owner->SetDeviceRef(ref); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) hsRefCnt_SafeUnRef(ref); if (ref != nullptr && !ref->IsLinked()) @@ -458,14 +482,16 @@ hsGDeviceRef* plGLPipeline::MakeRenderTargetRef(plRenderTarget* owner) // Mark as not dirty so it doesn't get re-created if (ref != nullptr) ref->SetDirty(false); + else { + hsStatusMessage("Got an unfilled render target!"); + ref->SetDirty(false); + } return ref; } bool plGLPipeline::BeginRender() { - // TODO: Device Init/Reset stuff here - // offset transform RefreshScreenMatrices(); @@ -475,6 +501,8 @@ bool plGLPipeline::BeginRender() fVtxRefTime++; + IPreprocessAvatarTextures(); + hsColorRGBA clearColor = GetClearColor(); glDepthMask(GL_TRUE); @@ -562,9 +590,46 @@ void plGLPipeline::Resize(uint32_t width, uint32_t height) void plGLPipeline::LoadResources() { - if (plGLPlateManager* pm = static_cast(fPlateMgr)) { + hsStatusMessageF("Begin Device Reload t=%f",hsTimer::GetSeconds()); + plNetClientApp::StaticDebugMsg("Begin Device Reload"); + + if (plGLPlateManager* pm = static_cast(fPlateMgr)) pm->IReleaseGeometry(); + + IReleaseAvRTPool(); + + if (fDevice.fContextType == plGLDevice::kNone) { + // We can't create anything if the OpenGL context isn't initialized + plProfile_IncCount(PipeReload, 1); + + hsStatusMessageF("End Device Reload (but no GL Context) t=%f",hsTimer::GetSeconds()); + plNetClientApp::StaticDebugMsg("End Device Reload (but no GL Context)"); + return; } + + // Create all RenderTargets + plPipeRTMakeMsg* rtMake = new plPipeRTMakeMsg(this); + rtMake->Send(); + + if (plGLPlateManager* pm = static_cast(fPlateMgr)) + pm->ICreateGeometry(); + + plPipeGeoMakeMsg* defMake = new plPipeGeoMakeMsg(this, true); + defMake->Send(); + + IFillAvRTPool(); + + // Force a create of all our static vertex buffers. + plPipeGeoMakeMsg* manMake = new plPipeGeoMakeMsg(this, false); + manMake->Send(); + + // Okay, we've done it, clear the request. + plPipeResReq::Clear(); + + plProfile_IncCount(PipeReload, 1); + + hsStatusMessageF("End Device Reload t=%f",hsTimer::GetSeconds()); + plNetClientApp::StaticDebugMsg("End Device Reload"); } bool plGLPipeline::SetGamma(float eR, float eG, float eB) @@ -611,7 +676,7 @@ void plGLPipeline::RenderSpans(plDrawableSpans* ice, const std::vector& hsGMaterial* material; const std::vector& spans = ice->GetSpanArray(); - //plProfile_IncCount(EmptyList, visList.empty()); + plProfile_IncCount(EmptyList, visList.empty()); /// Set this (*before* we do our TestVisibleWorld stuff...) lastL2W.Reset(); @@ -644,7 +709,7 @@ void plGLPipeline::RenderSpans(plDrawableSpans* ice, const std::vector& break; } plProfile_EndTiming(MergeCheck); - //plProfile_Inc(SpanMerge); + plProfile_Inc(SpanMerge); plProfile_BeginTiming(MergeSpan); spans[visList[j]]->MergeInto(&tempIce); @@ -847,8 +912,8 @@ void plGLPipeline::IRenderBufferSpan(const plIcicle& span, // If the layer opacity is 0, don't draw it. This prevents it from // contributing to the Z buffer. This can happen with some models like // the fire marbles in the neighborhood that have some models for - // physics only, and then can block other rendering in the Z buffer. DX - // pipeline does this in ILoopOverLayers. + // physics only, and then can block other rendering in the Z buffer. + // DX pipeline does this in ILoopOverLayers. if ((s.fBlendFlags & hsGMatState::kBlendAlpha) && lay->GetOpacity() <= 0 && fCurrLightingMethod != plSpan::kLiteVtxPreshaded) continue; @@ -1083,7 +1148,7 @@ void plGLPipeline::ISetCullMode() void plGLPipeline::ICalcLighting(plGLMaterialShaderRef* mRef, const plLayerInterface* currLayer, const plSpan* currSpan) { - //plProfile_Inc(MatLightState); + plProfile_Inc(MatLightState); GLint e; @@ -1450,6 +1515,70 @@ void plGLPipeline::IDrawPlate(plPlate* plate) glDrawElements(GL_TRIANGLE_STRIP, 6, GL_UNSIGNED_SHORT, (GLvoid*)(sizeof(uint16_t) * 0)); } +struct plAVTexVert +{ + float fPos[3]; + float fUv[2]; +}; + +void plGLPipeline::IPreprocessAvatarTextures() +{ + plProfile_Set(AvRTPoolUsed, fClothingOutfits.size()); + plProfile_Set(AvRTPoolCount, fAvRTPool.size()); + plProfile_Set(AvRTPoolRes, fAvRTWidth); + plProfile_Set(AvRTShrinkTime, uint32_t(hsTimer::GetSysSeconds() - fAvRTShrinkValidSince)); + + // Frees anyone used last frame that we don't need this frame + IClearClothingOutfits(&fPrevClothingOutfits); + + if (fClothingOutfits.empty()) + return; + + static float kIdentityMatrix[16] = { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + }; + + //glUniformMatrix4fv(mRef->uMatrixProj, 1, GL_TRUE, kIdentityMatrix); + //glUniformMatrix4fv(mRef->uMatrixW2C, 1, GL_TRUE, kIdentityMatrix); + //glUniformMatrix4fv(mRef->uMatrixC2W, 1, GL_TRUE, kIdentityMatrix); + //glUniformMatrix4fv(mRef->uMatrixL2W, 1, GL_TRUE, kIdentityMatrix); + + for (size_t oIdx = 0; oIdx < fClothingOutfits.size(); oIdx++) { + plClothingOutfit* co = fClothingOutfits[oIdx]; + if (co->fBase == nullptr || co->fBase->fBaseTexture == nullptr) + continue; + +#if 0 + plRenderTarget* rt = plRenderTarget::ConvertNoRef(co->fTargetLayer->GetTexture()); + if (rt != nullptr && co->fDirtyItems.Empty()) + // we've still got our valid RT from last frame and we have nothing to do. + continue; + + if (rt == nullptr) { + rt = IGetNextAvRT(); + co->fTargetLayer->SetTexture(rt); + } +#endif + + //PushRenderTarget(rt); + + // HACK HACK HACK + co->fTargetLayer->SetTexture(co->fBase->fBaseTexture); + + // TODO: Actually render to the render target + + //PopRenderTarget(); + //co->fDirtyItems.Clear(); + } + + fView.fXformResetFlags = fView.kResetAll; + + fClothingOutfits.swap(fPrevClothingOutfits); +} + bool plGLPipeline::ISoftwareVertexBlend(plDrawableSpans* drawable, const std::vector& visList) { diff --git a/Sources/Plasma/FeatureLib/pfGLPipeline/plGLPipeline.h b/Sources/Plasma/FeatureLib/pfGLPipeline/plGLPipeline.h index f1b79f4d50..bbae38983a 100644 --- a/Sources/Plasma/FeatureLib/pfGLPipeline/plGLPipeline.h +++ b/Sources/Plasma/FeatureLib/pfGLPipeline/plGLPipeline.h @@ -147,6 +147,7 @@ class plGLPipeline : public pl3DPipeline void IDisableLight(plGLMaterialShaderRef* mRef, size_t i); void IScaleLight(plGLMaterialShaderRef* mRef, size_t i, float scale); void IDrawPlate(plPlate* plate); + void IPreprocessAvatarTextures(); /** * Emulate matrix palette operations in software. diff --git a/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.cpp b/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.cpp index 14d4297969..dc7372b41e 100644 --- a/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.cpp +++ b/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.cpp @@ -83,6 +83,16 @@ plProfile_CreateCounter("LightActive", "PipeC", LightActive); plProfile_CreateCounter("Lights Found", "PipeC", FindLightsFound); plProfile_CreateCounter("Perms Found", "PipeC", FindLightsPerm); plProfile_CreateCounter("NumSkin", "PipeC", NumSkin); +plProfile_CreateCounter("AvRTPoolUsed", "PipeC", AvRTPoolUsed); +plProfile_CreateCounter("AvRTPoolCount", "PipeC", AvRTPoolCount); +plProfile_CreateCounter("AvRTPoolRes", "PipeC", AvRTPoolRes); +plProfile_CreateCounter("AvRTShrinkTime", "PipeC", AvRTShrinkTime); +plProfile_CreateCounter("AvatarFaces", "PipeC", AvatarFaces); +plProfile_CreateCounter("Merge", "PipeC", SpanMerge); +plProfile_CreateCounter("LiState", "PipeC", MatLightState); +plProfile_CreateCounter("EmptyList", "PipeC", EmptyList); + +plProfile_CreateCounterNoReset("Reload", "PipeC", PipeReload); plProfile_CreateCounter("Polys", "General", DrawTriangles); plProfile_CreateCounter("Material Change", "Draw", MatChange); diff --git a/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.h b/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.h index 1bedda7ebf..8c1381616f 100644 --- a/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.h +++ b/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.h @@ -99,6 +99,8 @@ plProfile_Extern(LightChar); plProfile_Extern(LightActive); plProfile_Extern(FindLightsFound); plProfile_Extern(FindLightsPerm); +plProfile_Extern(AvatarSort); +plProfile_Extern(AvatarFaces); static const float kPerspLayerScale = 0.00001f; static const float kPerspLayerScaleW = 0.001f; @@ -938,6 +940,32 @@ class pl3DPipeline : public plPipeline /** pass the current local to world tranform on to the device. */ void ILocalToWorldToDevice(); + + /** + * Sorts the avatar geometry for display. + * + * We handle avatar sort differently from the rest of the face sort. The + * reason is that within the single avatar index buffer, we want to only + * sort the faces of spans requesting a sort, and sort them in place. + * + * Contrast that with the normal scene translucency sort. There, we sort + * all the spans in a drawble, then we sort all the faces in that drawable, + * then for each span in the sorted span list, we extract the faces for + * that span appending onto the index buffer. This gives great efficiency + * because only the visible faces are sorted and they wind up packed into + * the front of the index buffer, which permits more batching. See + * plDrawableSpans::SortVisibleSpans. + * + * For the avatar, it's generally the case that all the avatar is visible + * or not, and there is only one material, so neither of those efficiencies + * is helpful. Moreover, for the avatar the faces we want sorted are a tiny + * subset of the avatar's faces. Moreover, and most importantly, for the + * avatar, we want to preserve the order that spans are drawn, so, for + * example, the opaque base head will always be drawn before the + * translucent hair fringe, which will always be drawn before the pink + * clear plastic baseball cap. + */ + bool IAvatarSort(plDrawableSpans* d, const std::vector& visList); }; @@ -1025,6 +1053,7 @@ pl3DPipeline::~pl3DPipeline() while (fActiveLights) UnRegisterLight(fActiveLights); + IReleaseAvRTPool(); IClearClothingOutfits(&fClothingOutfits); IClearClothingOutfits(&fPrevClothingOutfits); } @@ -2171,4 +2200,136 @@ void pl3DPipeline::ILocalToWorldToDevice() fView.fXformResetFlags &= ~fView.kResetL2W; } +struct plSortFace +{ + uint16_t fIdx[3]; + float fDist; +}; + +struct plCompSortFace +{ + bool operator()( const plSortFace& lhs, const plSortFace& rhs) const + { + return lhs.fDist > rhs.fDist; + } +}; + +template +bool pl3DPipeline::IAvatarSort(plDrawableSpans* d, const std::vector& visList) +{ + plProfile_BeginTiming(AvatarSort); + for (int16_t visIdx : visList) + { + hsAssert(d->GetSpan(visIdx)->fTypeMask & plSpan::kIcicleSpan, "Unknown type for sorting faces"); + + plIcicle* span = (plIcicle*)d->GetSpan(visIdx); + + if (span->fProps & plSpan::kPartialSort) { + hsAssert(d->GetBufferGroup(span->fGroupIdx)->AreIdxVolatile(), "Badly setup buffer group - set PartialSort too late?"); + + const hsPoint3 viewPos = GetViewPositionWorld(); + + plGBufferGroup* group = d->GetBufferGroup(span->fGroupIdx); + + typename DeviceType::VertexBufferRef* vRef = static_cast(group->GetVertexBufferRef(span->fVBufferIdx)); + + const uint8_t* vdata = vRef->fData; + const uint32_t stride = vRef->fVertexSize; + + const int numTris = span->fILength/3; + + static std::vector sortScratch; + sortScratch.resize(numTris); + + plProfile_IncCount(AvatarFaces, numTris); + + // Have three very similar sorts here, differing only on where the "position" of + // each triangle is defined, either as the center of the triangle, the nearest + // point on the triangle, or the farthest point on the triangle. + // Having tried all three on the avatar (the only thing this sort is used on), + // the best results surprisingly came from using the center of the triangle. + uint16_t* indices = group->GetIndexBufferData(span->fIBufferIdx) + span->fIStartIdx; + int j; + for( j = 0; j < numTris; j++ ) + { +#if 1 // TRICENTER + uint16_t idx = *indices++; + sortScratch[j].fIdx[0] = idx; + hsPoint3 pos = *(hsPoint3*)(vdata + idx * stride); + + idx = *indices++; + sortScratch[j].fIdx[1] = idx; + pos += *(hsPoint3*)(vdata + idx * stride); + + idx = *indices++; + sortScratch[j].fIdx[2] = idx; + pos += *(hsPoint3*)(vdata + idx * stride); + + pos *= 0.3333f; + + sortScratch[j].fDist = hsVector3(&pos, &viewPos).MagnitudeSquared(); +#elif 0 // NEAREST + uint16_t idx = *indices++; + sortScratch[j].fIdx[0] = idx; + hsPoint3 pos = *(hsPoint3*)(vdata + idx * stride); + float dist = hsVector3(&pos, &viewPos).MagnitudeSquared(); + float minDist = dist; + + idx = *indices++; + sortScratch[j].fIdx[1] = idx; + pos = *(hsPoint3*)(vdata + idx * stride); + dist = hsVector3(&pos, &viewPos).MagnitudeSquared(); + if( dist < minDist ) + minDist = dist; + + idx = *indices++; + sortScratch[j].fIdx[2] = idx; + pos = *(hsPoint3*)(vdata + idx * stride); + dist = hsVector3(&pos, &viewPos).MagnitudeSquared(); + if( dist < minDist ) + minDist = dist; + + sortScratch[j].fDist = minDist; +#elif 1 // FURTHEST + uint16_t idx = *indices++; + sortScratch[j].fIdx[0] = idx; + hsPoint3 pos = *(hsPoint3*)(vdata + idx * stride); + float dist = hsVector3(&pos, &viewPos).MagnitudeSquared(); + float maxDist = dist; + + idx = *indices++; + sortScratch[j].fIdx[1] = idx; + pos = *(hsPoint3*)(vdata + idx * stride); + dist = hsVector3(&pos, &viewPos).MagnitudeSquared(); + if( dist > maxDist ) + maxDist = dist; + + idx = *indices++; + sortScratch[j].fIdx[2] = idx; + pos = *(hsPoint3*)(vdata + idx * stride); + dist = hsVector3(&pos, &viewPos).MagnitudeSquared(); + if( dist > maxDist ) + maxDist = dist; + + sortScratch[j].fDist = maxDist; +#endif // SORTTYPES + } + + std::sort(sortScratch.begin(), sortScratch.end(), plCompSortFace()); + + indices = group->GetIndexBufferData(span->fIBufferIdx) + span->fIStartIdx; + for (const plSortFace& iter : sortScratch) + { + *indices++ = iter.fIdx[0]; + *indices++ = iter.fIdx[1]; + *indices++ = iter.fIdx[2]; + } + + group->DirtyIndexBuffer(span->fIBufferIdx); + } + } + plProfile_EndTiming(AvatarSort); + return true; +} + #endif //_pl3DPipeline_inc_