Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/design/datacontracts/Loader.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ internal enum AvailableMetadataType
ModuleHandle GetModuleHandle(TargetPointer);
TargetPointer GetAssembly(ModuleHandle handle);
ModuleFlags GetFlags(ModuleHandle handle);
string GetPath(ModuleHandle handle);
TargetPointer GetLoaderAllocator(ModuleHandle handle);
TargetPointer GetThunkHeap(ModuleHandle handle);
TargetPointer GetILBase(ModuleHandle handle);
Expand All @@ -122,6 +123,7 @@ Data descriptors used:
| `Module` | `Flags` | Assembly of the Module |
| `Module` | `LoaderAllocator` | LoaderAllocator of the Module |
| `Module` | `ThunkHeap` | Pointer to the thunk heap |
| `Module` | `Path` | Path of the Module (UTF-16, null-terminated) |
| `Module` | `DynamicMetadata` | Pointer to saved metadata for reflection emit modules |
| `Module` | `FieldDefToDescMap` | Mapping table |
| `Module` | `ManifestModuleReferencesMap` | Mapping table |
Expand Down Expand Up @@ -149,6 +151,13 @@ ModuleFlags GetFlags(ModuleHandle handle)
return target.Read<uint>(handle.Address + /* Module::Flags offset */);
}

string GetPath(ModuleHandle handle)
{
TargetPointer pathStart = target.ReadPointer(handle.Address + /* Module::Path offset */);
char[] path = // Read<char> from target starting at pathStart until null terminator
return new string(path);
}

TargetPointer GetLoaderAllocator(ModuleHandle handle)
{
return target.ReadPointer(handle.Address + /* Module::LoaderAllocator offset */);
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/debug/daccess/dacimpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,7 @@ class ClrDataAccess
HRESULT GetObjectDataImpl(CLRDATA_ADDRESS addr, struct DacpObjectData *objectData);
HRESULT GetObjectExceptionDataImpl(CLRDATA_ADDRESS objAddr, struct DacpExceptionObjectData *data);
HRESULT GetObjectStringDataImpl(CLRDATA_ADDRESS obj, unsigned int count, _Inout_updates_z_(count) WCHAR *stringData, unsigned int *pNeeded);
HRESULT GetPEFileNameImpl(CLRDATA_ADDRESS moduleAddr, unsigned int count, _Inout_updates_z_(count) WCHAR *fileName, unsigned int *pNeeded);
HRESULT GetUsefulGlobalsImpl(struct DacpUsefulGlobalsData *globalsData);
HRESULT GetMethodDescDataImpl(CLRDATA_ADDRESS methodDesc, CLRDATA_ADDRESS ip, struct DacpMethodDescData *data, ULONG cRevertedRejitVersions, DacpReJitData * rgRevertedRejitData, ULONG * pcNeededRevertedRejitData);
HRESULT GetMethodDescNameImpl(CLRDATA_ADDRESS methodDesc, unsigned int count, _Inout_updates_z_(count) WCHAR *name, unsigned int *pNeeded);
Expand Down
42 changes: 38 additions & 4 deletions src/coreclr/debug/daccess/request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2542,18 +2542,53 @@ ClrDataAccess::GetPEFileName(CLRDATA_ADDRESS moduleAddr, unsigned int count, _In
return E_INVALIDARG;

SOSDacEnter();

if (m_cdacSos != NULL)
{
hr = m_cdacSos->GetPEFileName(moduleAddr, count, fileName, pNeeded);
if (FAILED(hr))
{
hr = GetPEFileNameImpl(moduleAddr, count, fileName, pNeeded);
}
#ifdef _DEBUG
else
{
NewArrayHolder<WCHAR> fileNameLocal(new WCHAR[count]);
unsigned int neededLocal = 0;
HRESULT hrLocal = GetPEFileNameImpl(moduleAddr, count, fileNameLocal, &neededLocal);

DacAssertsEnabledHolder assertsEnabled;
_ASSERTE(hr == hrLocal);
_ASSERTE(pNeeded == NULL || *pNeeded == neededLocal);
_ASSERTE(fileName == NULL || u16_strncmp(fileName, fileNameLocal, count) == 0);
}
#endif
}
else
{
hr = GetPEFileNameImpl(moduleAddr, count, fileName, pNeeded);;
}


SOSDacLeave();
return hr;
}

HRESULT
ClrDataAccess::GetPEFileNameImpl(CLRDATA_ADDRESS moduleAddr, unsigned int count, _Inout_updates_z_(count) WCHAR *fileName, unsigned int *pNeeded)
{
PTR_Module pModule = PTR_Module(TO_TADDR(moduleAddr));
PEAssembly* pPEAssembly = pModule->GetPEAssembly();

// Turn from bytes to wide characters
if (!pPEAssembly->GetPath().IsEmpty())
{
if (!pPEAssembly->GetPath().DacGetUnicode(count, fileName, pNeeded))
hr = E_FAIL;
return E_FAIL;
}
else if (!pPEAssembly->IsReflectionEmit())
{
hr = E_NOTIMPL;
return E_NOTIMPL;
}
else
{
Expand All @@ -2564,8 +2599,7 @@ ClrDataAccess::GetPEFileName(CLRDATA_ADDRESS moduleAddr, unsigned int count, _In
*pNeeded = 1;
}

SOSDacLeave();
return hr;
return S_OK;
}

HRESULT
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/debug/runtimeinfo/datadescriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ CDAC_TYPE_FIELD(Module, /*pointer*/, Flags, cdac_data<Module>::Flags)
CDAC_TYPE_FIELD(Module, /*pointer*/, LoaderAllocator, cdac_data<Module>::LoaderAllocator)
CDAC_TYPE_FIELD(Module, /*pointer*/, ThunkHeap, cdac_data<Module>::ThunkHeap)
CDAC_TYPE_FIELD(Module, /*pointer*/, DynamicMetadata, cdac_data<Module>::DynamicMetadata)
CDAC_TYPE_FIELD(Module, /*pointer*/, Path, cdac_data<Module>::Path)

CDAC_TYPE_FIELD(Module, /*pointer*/, FieldDefToDescMap, cdac_data<Module>::FieldDefToDescMap)
CDAC_TYPE_FIELD(Module, /*pointer*/, ManifestModuleReferencesMap, cdac_data<Module>::ManifestModuleReferencesMap)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/vm/ceeload.h
Original file line number Diff line number Diff line change
Expand Up @@ -1639,6 +1639,7 @@ struct cdac_data<Module>
static constexpr size_t LoaderAllocator = offsetof(Module, m_loaderAllocator);
static constexpr size_t ThunkHeap = offsetof(Module, m_pThunkHeap);
static constexpr size_t DynamicMetadata = offsetof(Module, m_pDynamicMetadata);
static constexpr size_t Path = offsetof(Module, m_path);

// Lookup map pointers
static constexpr size_t FieldDefToDescMap = offsetof(Module, m_FieldDefToDescMap);
Expand Down
4 changes: 2 additions & 2 deletions src/native/managed/cdacreader/src/Contracts/Loader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ static IContract IContract.Create(Target target, int version)

public virtual TargetPointer GetAssembly(ModuleHandle handle) => throw new NotImplementedException();
public virtual ModuleFlags GetFlags(ModuleHandle handle) => throw new NotImplementedException();
public virtual string GetPath(ModuleHandle handle) => throw new NotImplementedException();

public virtual TargetPointer GetLoaderAllocator(ModuleHandle handle) => throw new NotImplementedException();
public virtual TargetPointer GetThunkHeap(ModuleHandle handle) => throw new NotImplementedException();

Expand All @@ -131,8 +133,6 @@ static IContract IContract.Create(Target target, int version)
public virtual TargetEcmaMetadata GetReadWriteMetadata(ModuleHandle handle) => throw new NotImplementedException();

public virtual ModuleLookupTables GetLookupTables(ModuleHandle handle) => throw new NotImplementedException();

public virtual string GetPath(ModuleHandle handle) => throw new NotImplementedException();
}

internal readonly struct Loader : ILoader
Expand Down
20 changes: 20 additions & 0 deletions src/native/managed/cdacreader/src/Contracts/Loader_1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;

namespace Microsoft.Diagnostics.DataContractReader.Contracts;

Expand Down Expand Up @@ -35,6 +36,25 @@ ModuleFlags ILoader.GetFlags(ModuleHandle handle)
return (ModuleFlags)module.Flags;
}

string ILoader.GetPath(ModuleHandle handle)
{
Data.Module module = _target.ProcessedData.GetOrAdd<Data.Module>(handle.Address);
TargetPointer addr = module.Path;
List<char> name = [];
while (true)
{
// Read characters until we find the null terminator
char nameChar = _target.Read<char>(addr);
if (nameChar == 0)
break;

name.Add(nameChar);
addr += sizeof(char);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would consider doing this slightly different. I would instead determine the entire length, find the null terminator and then reread the entire block of bytes in a single operation. I think it would be slightly easier to debug. I also don't know if endianness is important here. Metadata is always encoded as UTF-16LE, but I'm not sure about non-metadata allocated strings.

Copy link
Member

@davidwrighton davidwrighton Aug 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

non-metadata allocated strings would be big endian on a big endian machine

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this implementation is ... fine... but it would be better if we added an api to Target to read null terminated strings of both 16bit and 8 bit char types in target endianness. This is a thing which keeps coming up, and we shouldn't be re-implementing the wheel again and again.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

non-metadata allocated strings would be big endian on a big endian machine

Would they even be utf-16 at all?

(To be clear, I think this is kind of academic until someone sits down and does a coreclr port to POWER64 and actually makes some decision about what an LPCWSTR even is in that platform version of the CoreCLR PAL. I don't know what a reasonable answer might be. /cc @uweigand @directhex )

Do we want to stash an encoding somewhere in the runtime descriptor?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but it would be better if we added an api to Target to read null terminated strings of both 16bit and 8 bit char types in target endianness.

It is more complicated than that. We need to know if the target pointer is into metadata, which is always LE. I was going to suggest the same thing, but the metadata angle makes it less obvious the correct answer.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even with the metadata angle, I think having APIs on Target reading strings in target endianness still makes sense.

We need to know if the target pointer is into metadata, which is always LE.

And then it is up to the caller / consumer of the target pointer to know that it shouldn't be read in target endianness and to read it their own way?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think having APIs on Target reading strings in target endianness still makes sense.

Sure, I agree with this. My point is, it isn't as simple as "read the string". In fact it can become very complicated if the API doesn't know the encoding. The API needs to know the encoding apriori or else it won't be able to detect null properly. Is it the "high order" byte of a UTF-16 or an actual null in UTF-8? The string reading API needs to know. The endianness for UTF-16 is easier, but still can be confusing to the caller. The API said it read UTF-16, but now the caller must consider the source and determine if it is in machine endian form or metadta endian form. Creating three APIs would be needed.

It would need to be something like:

string ReadUTF8String();
string ReadUTF16TargetEndianness();
string ReadUTF16MetadataEndianness();

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think @AaronRobinsonMSFT's apis here are good enough for now. As @lambdageek points out, dealing with actual big endian machines is quite unlikely to happen in the near term, and we can get away with having a "reasonable" model now, and tweak it to match reality if/when such a thing happens. I also don't think this refactor to using a Target level api should happen in this PR, but should be deferred to a follow-up PR which is focused on making the string handling sensible.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I was expecting multiple APIs - like what @AaronRobinsonMSFT wrote. I put in a TODO for adding/using Target APIs and will a follow-up change for that.


return new string(CollectionsMarshal.AsSpan(name));
}

TargetPointer ILoader.GetLoaderAllocator(ModuleHandle handle)
{
Data.Module module = _target.ProcessedData.GetOrAdd<Data.Module>(handle.Address);
Expand Down
2 changes: 2 additions & 0 deletions src/native/managed/cdacreader/src/Data/Module.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public Module(Target target, TargetPointer address)
LoaderAllocator = target.ReadPointer(address + (ulong)type.Fields[nameof(LoaderAllocator)].Offset);
ThunkHeap = target.ReadPointer(address + (ulong)type.Fields[nameof(ThunkHeap)].Offset);
DynamicMetadata = target.ReadPointer(address + (ulong)type.Fields[nameof(DynamicMetadata)].Offset);
Path = target.ReadPointer(address + (ulong)type.Fields[nameof(Path)].Offset);

FieldDefToDescMap = target.ReadPointer(address + (ulong)type.Fields[nameof(FieldDefToDescMap)].Offset);
ManifestModuleReferencesMap = target.ReadPointer(address + (ulong)type.Fields[nameof(ManifestModuleReferencesMap)].Offset);
Expand All @@ -39,6 +40,7 @@ public Module(Target target, TargetPointer address)
public TargetPointer LoaderAllocator { get; init; }
public TargetPointer ThunkHeap { get; init; }
public TargetPointer DynamicMetadata { get; init; }
public TargetPointer Path { get; init; }

public TargetPointer FieldDefToDescMap { get; init; }
public TargetPointer ManifestModuleReferencesMap { get; init; }
Expand Down
30 changes: 29 additions & 1 deletion src/native/managed/cdacreader/src/Legacy/SOSDacImpl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,35 @@ public unsafe int GetObjectStringData(ulong obj, uint count, char* stringData, u
public unsafe int GetOOMData(ulong oomAddr, void* data) => HResults.E_NOTIMPL;
public unsafe int GetOOMStaticData(void* data) => HResults.E_NOTIMPL;
public unsafe int GetPEFileBase(ulong addr, ulong* peBase) => HResults.E_NOTIMPL;
public unsafe int GetPEFileName(ulong addr, uint count, char* fileName, uint* pNeeded) => HResults.E_NOTIMPL;

public unsafe int GetPEFileName(ulong addr, uint count, char* fileName, uint* pNeeded)
{
try
{
Contracts.ILoader contract = _target.Contracts.Loader;
Contracts.ModuleHandle handle = contract.GetModuleHandle(addr);
string path = contract.GetPath(handle);

// Return not implemented for empty paths for non-reflection emit assemblies (for example, loaded from memory)
if (string.IsNullOrEmpty(path))
{
Contracts.ModuleFlags flags = contract.GetFlags(handle);
if (!flags.HasFlag(Contracts.ModuleFlags.ReflectionEmit))
{
return HResults.E_NOTIMPL;
}
}

CopyStringToTargetBuffer(fileName, count, pNeeded, path);
}
catch (System.Exception ex)
{
return ex.HResult;
}

return HResults.S_OK;
}

public unsafe int GetPrivateBinPaths(ulong appDomain, int count, char* paths, uint* pNeeded) => HResults.E_NOTIMPL;
public unsafe int GetRCWData(ulong addr, void* data) => HResults.E_NOTIMPL;
public unsafe int GetRCWInterfaces(ulong rcw, uint count, void* interfaces, uint* pNeeded) => HResults.E_NOTIMPL;
Expand Down