From bd33eceb293bef4222613e1c828f17d8aa55283d Mon Sep 17 00:00:00 2001 From: Daniel Dragan Date: Tue, 19 Aug 2025 23:56:29 -0400 Subject: [PATCH] win32/vmem.h change emulated calloc() to OS/CRT's native calloc() Most OSes/libcs have an optimization that calloc() sometimes, or most of the time, do not call memset() in userland wasting CPU to zeroize brand new memory blocks/pages obtained fresh from the kernel. The larger the calloc() allocation is, the higher chance the memory blocks will be obtained fresh from the kernel. MS CRT's calloc() is a wrapper function that is either thin or heavy (personal opinions), and ultimatly forwards to HeapAlloc(hSecretUBHandle,HEAP_ZERO_MEMORY,size). Whether HeapAlloc@Kernel32.dll has or doesn't have the don't memset(,0,); fresh kernel pages optimization, this author doesn't know and it is irrelevant. WinPerl did its part, to take advantage of the optimization if it exists inside Microsoft's closed source OS. Historically perlhost.h/vmem.h was perl5xx.dll emulating calloc() because this area of the interp is "unfinished business" from the late 1990s where Win95 and "Win32s Runtime" on Win 3.11 WFW OS compatiblity was critical for WinPerl. WinNT Kernel Win OSes have always been POSIX-like or actually Unix SVR1 1983 compatible from the start (and remained compatible with POSIX/SVR1 1983 until WSL 1). The alternate never used memory allocator in vmem.h doesn't have a Calloc() method, so the nextgen and current "native kernel32.dll malloc()" code couldn't implement a Calloc() method. The DIY malloc() impl doesn't have a Calloc() because in 1993-1997-ish, VirtualAlloc, VirtualProtect, VirtualFree, couldn't be used in WinPerl for some reason lost to time. This author's Win95 Kernel32.dll file exports all 3 functions and they are not stubs that only do "return STATUS_NOT_IMPLEMENTED;". do_crt_invalid_parameter() was added so the DIY allocator behaves like the native MS CRT calloc() behaves. perlhost.h's design concept is that the library can be copy pasted without modification to the PHP and Python interps, something like that. Therefore perlhost.h and vmem.h aren't allowed to be aware of the Perl C API. So no croak()/die()/die_noperl(). -split off the very cold "Free to wrong pool" panic branch into its own function. Less "dead" machine code for the CPU to skip around in the perf critical VMemNL::Free() call. VC 2022 -O1 LTO inlined the DispatchWrongPool() method against our wishes, so override VC 2022's and GCC's inline criteria. We do not want inlining here. -move 2 of void* writes out of the CS lock inside PerlMemSharedMalloc() PerlMemMalloc and PerlMemParseMalloc and the Calloc()s, they are writes of constants to a new mem block and not reads/writes to the head (VMem*) object, or the first block hanging off the VMem* LL, so its not needed to muxtex lock those 2 writes -m_lRefCount assignment in VMem::VMem so CC doesn't need to save var this around fn call InitializeCriticalSection in this function -change return NULL; to return ptr; better codegen on MSVC 2022, since optmizer doesnt realize var ptr is a free 0x0 value after false test and instead emits xor RAX, RAX; -reorder the VMem struct so VMemNL m_VMem (the per-my_perl pool) is at the the front --- iperlsys.h | 6 ++ win32/perlhost.h | 47 +++++-------- win32/vmem.h | 179 +++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 190 insertions(+), 42 deletions(-) diff --git a/iperlsys.h b/iperlsys.h index 40b3c19908d1..7f65b6421e1c 100644 --- a/iperlsys.h +++ b/iperlsys.h @@ -800,6 +800,12 @@ struct IPerlLIOInfo Interface for perl memory allocation */ +/* let CPAN conditionally know if these brand new macros are available: + PerlMem_calloc PerlMemShared_calloc PerlMemParse_calloc + note, we must always define this macro, regardless if the build config + is using these vtables, or this file NOOPs itself to the OS's libc */ +#define PERL_IMPLICIT_SYS_HAS_CALLOC 1 + #if defined(PERL_IMPLICIT_SYS) /* IPerlMem */ diff --git a/win32/perlhost.h b/win32/perlhost.h index af5e320afae4..45616d0348c9 100644 --- a/win32/perlhost.h +++ b/win32/perlhost.h @@ -66,14 +66,7 @@ class CPerlHost inline void* Malloc(size_t size) { return m_VMem.Malloc(size); }; inline void* Realloc(void* ptr, size_t size) { return m_VMem.Realloc(ptr, size); }; inline void Free(void* ptr) { m_VMem.Free(ptr); }; - inline void* Calloc(size_t num, size_t size) - { - size_t count = num*size; - void* lpVoid = Malloc(count); - if (lpVoid) - lpVoid = memset(lpVoid, 0, count); - return lpVoid; - }; + inline void* Calloc(size_t num, size_t size) { return m_VMem.Calloc(num, size); } inline void GetLock(void) { m_VMem.GetLock(); }; inline void FreeLock(void) { m_VMem.FreeLock(); }; inline int IsLocked(void) { return m_VMem.IsLocked(); }; @@ -107,11 +100,11 @@ class CPerlHost }; inline void* CallocShared(size_t num, size_t size) { - size_t count = num*size; - void* lpVoid = MallocShared(count); - if (lpVoid) - lpVoid = memset(lpVoid, 0, count); - return lpVoid; + void *result; + GetLockShared(); + result = m_pVMemShared->Calloc(num, size); + FreeLockShared(); + return result; }; /* IPerlMemParse */ @@ -124,14 +117,7 @@ class CPerlHost inline void* MallocParse(size_t size) { return m_pVMemParse->Malloc(size); }; inline void* ReallocParse(void* ptr, size_t size) { return m_pVMemParse->Realloc(ptr, size); }; inline void FreeParse(void* ptr) { m_pVMemParse->Free(ptr); }; - inline void* CallocParse(size_t num, size_t size) - { - size_t count = num*size; - void* lpVoid = MallocParse(count); - if (lpVoid) - lpVoid = memset(lpVoid, 0, count); - return lpVoid; - }; + inline void* CallocParse(size_t num, size_t size){ return m_pVMemParse->Calloc(num, size); }; /* IPerlEnv */ char *Getenv(const char *varname); @@ -188,6 +174,18 @@ class CPerlHost inline VMem* GetMemParse(void) { m_pVMemParse->AddRef(); return m_pVMemParse; }; inline VDir* GetDir(void) { return &m_vDir; }; +public: + + inline char* MapPathA(const char *pInName) { return m_vDir.MapPathA(pInName); }; + inline WCHAR* MapPathW(const WCHAR *pInName) { return m_vDir.MapPathW(pInName); }; + inline operator VDir* () { return GetDir(); }; + +protected: + + VMemNL m_VMem; /* make this 1st member of CPerlHost* struct, highest use */ + VMem* m_pVMemShared; + VMem* m_pVMemParse; + public: const struct IPerlMem* m_pHostperlMem; @@ -200,14 +198,7 @@ class CPerlHost const struct IPerlSock* m_pHostperlSock; const struct IPerlProc* m_pHostperlProc; - inline char* MapPathA(const char *pInName) { return m_vDir.MapPathA(pInName); }; - inline WCHAR* MapPathW(const WCHAR *pInName) { return m_vDir.MapPathW(pInName); }; - inline operator VDir* () { return GetDir(); }; protected: - VMemNL m_VMem; - VMem* m_pVMemShared; - VMem* m_pVMemParse; - LPSTR* m_lppEnvList; DWORD m_dwEnvCount; BOOL m_bTopLevel; // is this a toplevel host? diff --git a/win32/vmem.h b/win32/vmem.h index 3d5492958756..4607cb28b77a 100644 --- a/win32/vmem.h +++ b/win32/vmem.h @@ -27,6 +27,9 @@ // #define _USE_BUDDY_BLOCKS // #define _DEBUG_MEM + +static void * do_crt_invalid_parameter(void); + #ifdef _DEBUG_MEM #define ASSERT(f) if(!(f)) DebugBreak(); @@ -98,6 +101,14 @@ inline void MEMODSlx(char *str, long x) */ #ifdef _USE_LINKED_LIST + +# if defined(__GNUC__) +# define VMEM_FORCE_NOINLINE __attribute__((__noinline__)) +# elif defined(_MSC_VER) +# define VMEM_FORCE_NOINLINE __declspec(noinline) +# else +# error "Unknown C compiler family type" +#endif class VMemNL; /* NL = no locks */ class VMem; @@ -140,6 +151,7 @@ class VMemNL void* Malloc(size_t size); void* Realloc(void* pMem, size_t size); void Free(void* pMem); + void* Calloc(size_t num, size_t size); void GetLock(void); void FreeLock(void); int IsLocked(void); @@ -151,15 +163,26 @@ class VMemNL return TRUE; }; +#ifdef _USE_LINKED_LIST + VMEM_FORCE_NOINLINE void* DispatchWrongPool(PMEMORY_BLOCK_HEADER ptr); + /* Retval is NULL. Encourages CC to see a better ABI match, and maybe do + a tailcall in ::Realloc(), which would be the same as a real __noreturn + decl or ~0-4 CPU ops bigger. */ +#endif + protected: #ifdef _USE_LINKED_LIST + /* prep work that can be done outside of the CS lock */ + void PrepLinkBlock(PMEMORY_BLOCK_HEADER ptr) + { /* these 2 of 3 ptrs are psuedo-const addrs into our VMem* obj */ + ptr->pPrev = &m_Dummy; /* LL termination sentinal */ + ptr->u.owner_nl = this; + } void LinkBlock(PMEMORY_BLOCK_HEADER ptr) { PMEMORY_BLOCK_HEADER next = m_Dummy.pNext; m_Dummy.pNext = ptr; - ptr->pPrev = &m_Dummy; ptr->pNext = next; - ptr->u.owner_nl = this; next->pPrev = ptr; } void UnlinkBlock(PMEMORY_BLOCK_HEADER ptr) @@ -182,6 +205,9 @@ class VMem : public VMemNL { CRITICAL_SECTION m_cs; // access lock #endif volatile long m_lRefCount; // number of current users +#ifdef _USE_LINKED_LIST + VMEM_FORCE_NOINLINE void* DispatchWrongPool(PMEMORY_BLOCK_HEADER ptr); +#endif public: VMem(); @@ -190,6 +216,7 @@ class VMem : public VMemNL { void* Malloc(size_t size); void* Realloc(void* pMem, size_t size); void Free(void* pMem); + void* Calloc(size_t num, size_t size); void GetLock(void); void FreeLock(void); inline int IsLocked(void); @@ -200,6 +227,9 @@ class VMem : public VMemNL { VMemNL::VMemNL(void) { #ifdef _USE_LINKED_LIST +/* addr &m_Dummy happens to be (void*)&m_Dummy == (void*)(VMem*)this + the offset of member m_Dummy inside struct VMem {} is 0x00, and therefore + no U8 offset byte is present in machine code. */ m_Dummy.pNext = m_Dummy.pPrev = &m_Dummy; m_Dummy.u.owner_nl = this; #endif @@ -208,10 +238,10 @@ VMemNL::VMemNL(void) VMem::VMem(void) { + m_lRefCount = 1; #ifdef _USE_LINKED_LIST InitializeCriticalSection(&m_cs); #endif _USE_LINKED_LIST - m_lRefCount = 1; return; } @@ -252,8 +282,9 @@ void* VMemNL::Malloc(size_t size) PMEMORY_BLOCK_HEADER ptr = (PMEMORY_BLOCK_HEADER)malloc(size+sizeof(MEMORY_BLOCK_HEADER)); if (!ptr) { - return NULL; + return ptr; /* NULL */ } + PrepLinkBlock(ptr); GetLock(); LinkBlock(ptr); FreeLock(); @@ -282,6 +313,7 @@ void* VMemNL::Realloc(void* pMem, size_t size) FreeLock(); return NULL; } + PrepLinkBlock(ptr); LinkBlock(ptr); FreeLock(); @@ -297,16 +329,7 @@ void VMemNL::Free(void* pMem) if (pMem) { PMEMORY_BLOCK_HEADER ptr = (PMEMORY_BLOCK_HEADER)(((char*)pMem)-sizeof(MEMORY_BLOCK_HEADER)); if (ptr->u.owner_nl != this) { - if (ptr->u.owner_nl) { -#if 1 - int *nowhere = NULL; - Perl_warn_nocontext("Free to wrong pool %p not %p",this,ptr->u.owner_nl); - *nowhere = 0; /* this segfault is deliberate, - so you can see the stack trace */ -#else - ptr->u.owner_nl->Free(pMem); -#endif - } + DispatchWrongPool(ptr); return; } GetLock(); @@ -331,6 +354,47 @@ Win32 fixes-vmem.h hack to handle free-by-wrong-thread after eval "". #endif } +void* VMemNL::Calloc(size_t num, size_t size) +{ +#ifdef _USE_LINKED_LIST + PMEMORY_BLOCK_HEADER ptr; + size_t totalsize = num * size; + if (totalsize == 0) /* UCRT converts 0*0 to 1, and passed 1 to HeapAlloc */ + ptr = (PMEMORY_BLOCK_HEADER)calloc(1, sizeof(MEMORY_BLOCK_HEADER)); + else if (!((((size_t)0)-(0x20+sizeof(MEMORY_BLOCK_HEADER))) / num >= size)) + return do_crt_invalid_parameter(); + else + ptr = (PMEMORY_BLOCK_HEADER)calloc(1,totalsize+sizeof(MEMORY_BLOCK_HEADER)); + if (ptr == NULL) + return ptr; + PrepLinkBlock(ptr); + GetLock(); + LinkBlock(ptr); + FreeLock(); + return (ptr+1); +#else + return calloc(num, size); +#endif +} + +#ifdef _USE_LINKED_LIST +void* VMemNL::DispatchWrongPool(PMEMORY_BLOCK_HEADER ptr) +{ + if (ptr->u.owner_nl) { +#if 1 + int *nowhere = NULL; + Perl_warn_nocontext("Free to wrong pool %p not %p",this,ptr->u.owner_nl); + *nowhere = 0; /* this segfault is deliberate, + so you can see the stack trace */ +#else + void *pMem = (ptr+1); /* recreate the ptr interp was using */ + ptr->u.owner_nl->Free(pMem); +#endif + } + return NULL; +} +#endif /*_USE_LINKED_LIST*/ + #endif #undef VMemNL @@ -556,6 +620,7 @@ class VMem void* Malloc(size_t size); void* Realloc(void* pMem, size_t size); void Free(void* pMem); + void* Calloc(size_t num, size_t size); void GetLock(void); void FreeLock(void); inline int IsLocked(void); @@ -735,6 +800,26 @@ void VMem::Init(void) m_lAllocSize = lAllocStart; } +void* Calloc(size_t num, size_t size) +{ + void * ptr; + size_t totalsize = num * size; + if (totalsize == 0) { /* UCRT converts 0*0 to 1 */ + char * pv = Malloc(1); /* and passes 1 to HeapAlloc */ + if (pv) + pv[0] = 0xFE; /* don't '\0' it, instead poison it (WinPerl invented) */ + ptr = (void*)pv; /* ask for 0 bytes? you get 0 bytes! no '\0' for you */ + } /* this overflow check is supposedly the same one a real MS CRT uses */ + else if (!(( ((size_t)0) - 0x20) / num >= size)) + return do_crt_invalid_parameter(); + else { + ptr = Malloc(totalsize); + if (ptr) + ptr = memset(ptr, 0, totalsize); + } + return ptr; +} + void* VMem::Malloc(size_t size) { WALKHEAP(); @@ -1398,6 +1483,72 @@ void VMem::WalkHeap(int complete) #endif /* _USE_MSVCRT_MEM_ALLOC */ + +#ifndef STATUS_DLL_NOT_FOUND +# define STATUS_DLL_NOT_FOUND 0xC0000135 +#endif + +#ifndef STATUS_PROCEDURE_NOT_FOUND +# define STATUS_PROCEDURE_NOT_FOUND 0xC000007A +#endif + +typedef void(__cdecl * inv_arg_t)(void); +typedef void(__cdecl * inv_arg_noinfo_t)(void); + +/* Emulate CRT's UI behavior upon failure. In real world testing on various + MS CRT versions & ages _invalid_parameter() and _invalid_parameter_noinfo() + are no_return'es/SEGV'es but maybe the very rare "checked" or debug MS CRTs + allow resuming execution. */ + +static void * +do_crt_invalid_parameter(void) +{ + static inv_arg_t g_inv_arg = NULL; + static inv_arg_noinfo_t g_inv_arg_noinfo = NULL; + inv_arg_t inv_arg; + + errno = ENOMEM; + inv_arg = g_inv_arg; + if (!inv_arg) { + inv_arg_noinfo_t inv_arg_noinfo = g_inv_arg_noinfo; + if (!inv_arg_noinfo) { + char ** ppv = _sys_errlist; /* get a ptr into the CRT's .rdata */ + HMODULE h; + BOOL r = GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, (LPCSTR)ppv, &h); + if (r) { + inv_arg = (inv_arg_t)GetProcAddress(h, "_invalid_parameter"); + if (!inv_arg) { + inv_arg_noinfo = + (inv_arg_noinfo_t)GetProcAddress(h, "_invalid_parameter_noinfo"); + if (!inv_arg_noinfo) { + /* 0 = continuable, 0 = no args array ptr in arg 4 */ + RaiseException(STATUS_PROCEDURE_NOT_FOUND, 0, 0, NULL); + } + else { + g_inv_arg_noinfo = inv_arg_noinfo; + goto do_inv_arg_noinfo; + } + } + else { + g_inv_arg = inv_arg; + goto do_inv_arg; + } + } + else /* throw a SEGV-style GUI popup, crash code says its not a SEGV */ + RaiseException(STATUS_DLL_NOT_FOUND, 0, 0, NULL); + } + else { + do_inv_arg_noinfo: + inv_arg_noinfo(); + } + } + else { + do_inv_arg: + inv_arg(); + } + return NULL; +} + #define ___VMEM_H_INC___ #endif /* ___VMEM_H_INC___ */