junjunnさんの描画高速化取り込み
遅くなりましたが junjunn さんの描画高速化の成果を trunk/mona に取り込みました。
- memset高速化
- memcpy高速化
- 計算量を減らす
などの方法がとられています。素晴らしい。
以下 diff です。
Index: string.cpp =================================================================== --- string.cpp (リビジョン 4022) +++ string.cpp (リビジョン 4023) @@ -66,6 +66,7 @@ \author HigePon \date create:2002/12/15 update: */ +#if 0 // original void *memset(void* buf, int value, size_t size) { char *p = (char*)buf; @@ -77,7 +78,28 @@ } return buf; } +#else // junjunn +inline void divide(int iNumerator,int iDenominator,uint32_t* piQuotient,uint32_t* piRemainder) +{ + *piQuotient = iNumerator / iDenominator; + *piRemainder = iNumerator - iDenominator * *piQuotient; +} +void *memset(void* pTo,int iValue,size_t nCount) +{ + uint32_t* pdwTo=(uint32_t*)pTo; + uint8_t byValue=iValue; + uint32_t dwValue4 = byValue + (byValue<<8) + (byValue<<16) + (byValue<<24); + + uint32_t i4ByteCount,i1ByteCount; + divide(nCount,4,&i4ByteCount,&i1ByteCount); + + uint32_t n; + for(n=0;n<i4ByteCount;n++) {*pdwTo++ = dwValue4;} + uint8_t* pcTo=(uint8_t*)pdwTo; + for(n=0;n<i1ByteCount;n++) {*pcTo++ = byValue;} +} +#endif /*! \brief strlen @@ -142,19 +164,34 @@ return (unsigned char)*str1 - (unsigned char)*str2; } -void* memcpy(void* s1, void* s2, size_t size) { +#if 1 // junjunn +void* memcpy(void* pTo,void* cpFrom,size_t nCount) +{ + uint32_t i4ByteCount,i1ByteCount; + divide(nCount,4,&i4ByteCount,&i1ByteCount); + +//4バイト転送部分 + asm volatile("movl %0, %%edi \n" + "movl %1, %%esi \n" + "movl %2, %%ecx \n" + "cld \n" + "rep movsd \n" + : + : "m"(pTo), "m"(cpFrom), "m"(i4ByteCount) + : "edi", "esi", "ecx"); +//1バイト転送部分 + asm volatile("movl %0, %%ecx \n" + "rep movsb \n" + : + : "m"(i1ByteCount) + : "ecx"); + return pTo; +} +#else +void* memcpy(void* s1, void* s2, size_t size) +{ asm volatile("movl %0, %%edi \n" "movl %1, %%esi \n" "movl %2, %%ecx \n" @@ -167,6 +204,8 @@ return s1; } +#endif + char* strncpy(char* s1, const char* s2, size_t n) { Index: VesaConsole.h =================================================================== --- VesaConsole.h (リビジョン 4022) +++ VesaConsole.h (リビジョン 4023) @@ -87,6 +87,7 @@ uint32_t vramAddress; uint16_t uint8_tsPerScanLine; uint16_t bitsPerPixel; + uint16_t bytesPerPixel; }; VesaScreen screen; Index: VesaConsole.cpp =================================================================== --- VesaConsole.cpp (リビジョン 4022) +++ VesaConsole.cpp (リビジョン 4023) @@ -260,6 +260,7 @@ vramAddress = info->physBasePtr; uint8_tsPerScanLine = info->uint8_tsPerScanLine; bitsPerPixel = info->bitsPerPixel; + bytesPerPixel = bitsPerPixel / 8; selectMethod(info); } @@ -333,63 +334,18 @@ } #else // enough speed at -O3 - memset((void*)vramAddress, 0xff, w * h * (bitsPerPixel / 8)); + memset((void*)vramAddress, 0xff, w * h * bytesPerPixel); #endif } void VesaConsole::VesaScreen::clearScreenBlack(int w, int h) { - memset((void*)vramAddress, 0x00, w * h * (bitsPerPixel / 8)); + memset((void*)vramAddress, 0x00, w * h * bytesPerPixel); }