-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[llvm-objcopy][ELF] Add an option to remove notes #118739
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This adds an option `--remove-note` to selectively delete notes in an ELF file. For now, it is expected to be useful for editing core dump files; in particular, it searches for the notes in `PT_NOTE` segments and does not handle nested segments. The implementation can be extended later as needed.
@llvm/pr-subscribers-llvm-binary-utilities Author: Igor Kudrin (igorkudrin) ChangesThis adds an option Full diff: https://github.com/llvm/llvm-project/pull/118739.diff 7 Files Affected:
diff --git a/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h b/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h
index 59960b65307430..01a8762cfb9c37 100644
--- a/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h
+++ b/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h
@@ -15,6 +15,12 @@
namespace llvm {
namespace objcopy {
+// Note to remove info specified by --remove-note option.
+struct RemoveNoteInfo {
+ StringRef Name;
+ uint32_t TypeId;
+};
+
// ELF specific configuration for copying/stripping a single file.
struct ELFConfig {
uint8_t NewSymbolVisibility = (uint8_t)ELF::STV_DEFAULT;
@@ -31,6 +37,9 @@ struct ELFConfig {
bool KeepFileSymbols = false;
bool LocalizeHidden = false;
bool VerifyNoteSections = true;
+
+ // Notes specified by --remove-note option.
+ SmallVector<RemoveNoteInfo, 0> NotesToRemove;
};
} // namespace objcopy
diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
index 4793651f1d4e0b..1df64b0b7ce886 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
@@ -609,6 +609,97 @@ static void addSymbol(Object &Obj, const NewSymbolInfo &SymInfo,
Sec ? (uint16_t)SYMBOL_SIMPLE_INDEX : (uint16_t)SHN_ABS, 0);
}
+template <class ELFT>
+static Error removeNoteImpl(Object &Obj,
+ ArrayRef<RemoveNoteInfo> NotesToRemove) {
+ LLVM_ELF_IMPORT_TYPES_ELFT(ELFT);
+ for (Segment &Seg : Obj.segments()) {
+ // TODO: Support nested segments
+ if (Seg.Type != PT_NOTE || Seg.ParentSegment)
+ continue;
+
+ // Find chunks of the segment data to remove
+ struct DeletedRange {
+ uint64_t OldFrom;
+ uint64_t OldTo;
+ uint64_t NewOffset;
+ };
+ std::vector<DeletedRange> DataToRemove;
+ ArrayRef<uint8_t> OldData = Seg.getContents();
+ size_t Align = std::max<size_t>(4, Seg.Align);
+ uint64_t Offset = 0;
+ while (Offset + sizeof(Elf_Nhdr) <= OldData.size()) {
+ auto Nhdr = reinterpret_cast<const Elf_Nhdr *>(OldData.data() + Offset);
+ size_t FullSize = Nhdr->getSize(Align);
+ if (Offset + FullSize > OldData.size())
+ break;
+ Elf_Note Note(*Nhdr);
+ if (llvm::any_of(NotesToRemove, [&](const RemoveNoteInfo &NoteInfo) {
+ return NoteInfo.TypeId == Note.getType() &&
+ (NoteInfo.Name.empty() || NoteInfo.Name == Note.getName());
+ }))
+ DataToRemove.push_back({Offset, Offset + FullSize, 0});
+ Offset += FullSize;
+ }
+ if (DataToRemove.empty())
+ continue;
+
+ // Prepare the new segment data
+ std::vector<uint8_t> NewData;
+ NewData.reserve(OldData.size());
+ Offset = 0;
+ for (auto &RemRange : DataToRemove) {
+ if (Offset < RemRange.OldFrom) {
+ auto Slice = OldData.slice(Offset, RemRange.OldFrom - Offset);
+ NewData.insert(NewData.end(), Slice.begin(), Slice.end());
+ }
+ RemRange.NewOffset = NewData.size();
+ Offset = RemRange.OldTo;
+ }
+ if (Offset < OldData.size()) {
+ auto Slice = OldData.slice(Offset);
+ NewData.insert(NewData.end(), Slice.begin(), Slice.end());
+ }
+
+ auto CalculateNewOffset = [&](uint64_t SecOffset) {
+ uint64_t Offset = SecOffset - Seg.Offset;
+ auto It =
+ llvm::upper_bound(DataToRemove, Offset,
+ [](const uint64_t &Off, const DeletedRange &Range) {
+ return Off < Range.OldFrom;
+ });
+ if (It != DataToRemove.begin()) {
+ --It;
+ Offset = (Offset > It->OldTo) ? (Offset - It->OldTo + It->NewOffset)
+ : It->NewOffset;
+ }
+ return Offset + Seg.Offset;
+ };
+
+ // Remap the segment's sections
+ DenseMap<const SectionBase *, std::pair<uint64_t, uint64_t>> Mapping;
+ for (const SectionBase *Sec : Seg.Sections) {
+ uint64_t NewOffset = CalculateNewOffset(Sec->Offset);
+ uint64_t NewSize =
+ CalculateNewOffset(Sec->Offset + Sec->Size) - NewOffset;
+ Mapping.try_emplace(Sec, NewOffset, NewSize);
+ }
+
+ Obj.updateSegmentData(Seg, std::move(NewData), Mapping);
+ }
+ return Error::success();
+}
+
+static Error removeNote(Object &Obj, endianness Endianness,
+ ArrayRef<RemoveNoteInfo> NotesToRemove) {
+ // Note: notes for both 32-bit and 64-bit ELF files use 4-byte words in the
+ // header, so the parsers are the same.
+ if (Endianness == endianness::little)
+ return removeNoteImpl<ELF64LE>(Obj, NotesToRemove);
+ else
+ return removeNoteImpl<ELF64BE>(Obj, NotesToRemove);
+}
+
static Error
handleUserSection(const NewSectionInfo &NewSection,
function_ref<Error(StringRef, ArrayRef<uint8_t>)> F) {
@@ -799,6 +890,11 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
? endianness::little
: endianness::big;
+ if (!ELFConfig.NotesToRemove.empty()) {
+ if (Error Err = removeNote(Obj, E, ELFConfig.NotesToRemove))
+ return Err;
+ }
+
for (const NewSectionInfo &AddedSection : Config.AddSection) {
auto AddSection = [&](StringRef Name, ArrayRef<uint8_t> Data) -> Error {
OwnedDataSection &NewSection =
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index 01c2f24629077a..9f460f685706c8 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -2308,6 +2308,27 @@ Error Object::addNewSymbolTable() {
return Error::success();
}
+void Object::updateSegmentData(
+ Segment &S, std::vector<uint8_t> NewSegmentData,
+ const DenseMap<const SectionBase *, std::pair<uint64_t, uint64_t>>
+ &SectionMapping) {
+ auto It =
+ UpdatedSegments.insert_or_assign(&S, std::move(NewSegmentData)).first;
+ S.Contents = It->second;
+ S.FileSize = S.Contents.size();
+ if (S.MemSize)
+ S.MemSize = S.FileSize;
+ assert(SectionMapping.size() == S.Sections.size());
+ for (const auto &SM : SectionMapping) {
+ assert(SM.first->ParentSegment == &S && S.Sections.count(SM.first));
+ assert(SM.second.first >= S.Offset);
+ assert((SM.second.first + SM.second.second) <= (S.Offset + S.FileSize));
+ SectionBase *MutSec = const_cast<SectionBase *>(SM.first);
+ MutSec->Offset = SM.second.first;
+ MutSec->Size = SM.second.second;
+ }
+}
+
// Orders segments such that if x = y->ParentSegment then y comes before x.
static void orderSegments(std::vector<Segment *> &Segments) {
llvm::stable_sort(Segments, compareSegmentsByOffset);
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index 6ccf85387131e4..5e16d4c0c1885a 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -1159,6 +1159,7 @@ class Object {
std::vector<SegPtr> Segments;
std::vector<SecPtr> RemovedSections;
DenseMap<SectionBase *, std::vector<uint8_t>> UpdatedSections;
+ DenseMap<Segment *, std::vector<uint8_t>> UpdatedSegments;
static bool sectionIsAlloc(const SectionBase &Sec) {
return Sec.Flags & ELF::SHF_ALLOC;
@@ -1234,6 +1235,10 @@ class Object {
Segments.emplace_back(std::make_unique<Segment>(Data));
return *Segments.back();
}
+ void updateSegmentData(
+ Segment &S, std::vector<uint8_t> NewSegmentData,
+ const DenseMap<const SectionBase *, std::pair<uint64_t, uint64_t>>
+ &SectionMapping);
bool isRelocatable() const {
return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable;
}
diff --git a/llvm/test/tools/llvm-objcopy/ELF/remove-note.test b/llvm/test/tools/llvm-objcopy/ELF/remove-note.test
new file mode 100644
index 00000000000000..b24538e26d019e
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/ELF/remove-note.test
@@ -0,0 +1,98 @@
+# RUN: not llvm-objcopy --remove-note= - 2>&1 | FileCheck %s --check-prefix=ERR-NOTYPEID
+# RUN: not llvm-objcopy --remove-note=/1 - 2>&1 | FileCheck %s --check-prefix=ERR-EMPTYNAME
+# RUN: not llvm-objcopy --remove-note=CORE/1/2 - 2>&1 | FileCheck %s --check-prefix=ERR-INVNUM1
+# RUN: not llvm-objcopy --remove-note=Notanumber - 2>&1 | FileCheck %s --check-prefix=ERR-INVNUM2
+# RUN: not llvm-objcopy --remove-note=CORE/Notanumber - 2>&1 | FileCheck %s --check-prefix=ERR-INVNUM2
+
+# ERR-NOTYPEID: error: bad format for --remove-note, missing type_id
+# ERR-EMPTYNAME: error: bad format for --remove-note, note name is empty
+# ERR-INVNUM1: error: bad note type_id for --remove-note: '1/2'
+# ERR-INVNUM2: error: bad note type_id for --remove-note: 'Notanumber'
+
+# RUN: yaml2obj -D ALIGN=8 %s -o - \
+# RUN: | llvm-objcopy --remove-note=0x01 --remove-note=DUMMY/0x02 --remove-note=CORE/0x03 - - \
+# RUN: | llvm-readobj --segments --sections --notes - \
+# RUN: | FileCheck %s -D#SIZE=64
+
+# RUN: yaml2obj -D ALIGN=4 %s -o - \
+# RUN: | llvm-objcopy --remove-note=0x01 --remove-note=DUMMY/0x02 --remove-note=CORE/0x03 - - \
+# RUN: | llvm-readobj --segments --sections --notes - \
+# RUN: | FileCheck %s -D#SIZE=48
+
+# CHECK: Sections [
+# CHECK: Section {
+# CHECK: Name: .note
+# CHECK-NEXT: Type: SHT_NOTE
+# CHECK-NEXT: Flags [
+# CHECK-NEXT: ]
+# CHECK-NEXT: Address:
+# CHECK-NEXT: Offset: [[OFFSET:0x.+]]
+# CHECK-NEXT: Size: [[#%d,SIZE]]
+
+# CHECK: ProgramHeaders [
+# CHECK-NEXT: ProgramHeader {
+# CHECK-NEXT: Type: PT_NOTE
+# CHECK-NEXT: Offset: [[OFFSET]]
+# CHECK-NEXT: VirtualAddress: 0x0
+# CHECK-NEXT: PhysicalAddress: 0x0
+# CHECK-NEXT: FileSize: [[#%d,SIZE]]
+# CHECK-NEXT: MemSize: 0
+
+# CHECK: NoteSections [
+# CHECK-NEXT: NoteSection {
+# CHECK-NEXT: Name:
+# CHECK-NEXT: Offset: [[OFFSET]]
+# CHECK-NEXT: Size: 0x[[#%x,SIZE]]
+# CHECK-NEXT: Notes [
+# CHECK-NEXT: {
+# CHECK-NEXT: Owner: CORE
+# CHECK-NEXT: Data size: 0x2
+# CHECK-NEXT: Type: NT_FPREGSET
+# CHECK-NEXT: Description data (
+# CHECK-NEXT: 0000: 0202
+# CHECK-NEXT: )
+# CHECK-NEXT: }
+# CHECK-NEXT: {
+# CHECK-NEXT: Owner: CORE
+# CHECK-NEXT: Data size: 0x2
+# CHECK-NEXT: Type: NT_TASKSTRUCT
+# CHECK-NEXT: Description data (
+# CHECK-NEXT: 0000: 0404
+# CHECK-NEXT: )
+# CHECK-NEXT: }
+# CHECK-NEXT: ]
+# CHECK-NEXT: }
+# CHECK-NEXT: ]
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_CORE
+ Machine: EM_X86_64
+ProgramHeaders:
+ - Type: PT_NOTE
+ MemSize: 0
+ FirstSec: .note
+ LastSec: .note
+Sections:
+ - Name: .note
+ Type: SHT_NOTE
+ AddressAlign: [[ALIGN]]
+ Notes:
+ - Name: CORE
+ Type: 0x01
+ Desc: 0101
+ - Name: CORE
+ Type: 0x02
+ Desc: 0202
+ - Name: CORE
+ Type: 0x03
+ Desc: 0303
+ - Name: CORE
+ Type: 0x04
+ Desc: 0404
+ - Name: LINUX
+ Type: 0x01
+ Desc: 0505
+...
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
index 104d802b1e1eeb..5e348d65adca18 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
+++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
@@ -527,6 +527,37 @@ static Expected<NewSymbolInfo> parseNewSymbolInfo(StringRef FlagValue) {
return SI;
}
+static Expected<RemoveNoteInfo> parseRemoveNoteInfo(StringRef FlagValue) {
+ // Parse value given with --remove-note option. The format is:
+ //
+ // [name/]type_id
+ //
+ // where:
+ // <name> - optional note name. If not given, all notes with the specified
+ // <type_id> are removed.
+ // <type_id> - note type value, can be decimal or hexadecimal number prefixed
+ // with 0x.
+ RemoveNoteInfo NI;
+ if (FlagValue.empty())
+ return createStringError(errc::invalid_argument,
+ "bad format for --remove-note, missing type_id");
+ SmallVector<StringRef, 2> Tokens;
+ FlagValue.split(Tokens, '/', /*MaxSplit=*/1);
+ assert(!Tokens.empty() && Tokens.size() <= 2);
+ if (Tokens.size() == 2) {
+ if (Tokens[0].empty())
+ return createStringError(
+ errc::invalid_argument,
+ "bad format for --remove-note, note name is empty");
+ NI.Name = Tokens[0];
+ }
+ if (Tokens.back().getAsInteger(0, NI.TypeId))
+ return createStringError(errc::invalid_argument,
+ "bad note type_id for --remove-note: '%s'",
+ Tokens.back().str().c_str());
+ return NI;
+}
+
// Parse input option \p ArgValue and load section data. This function
// extracts section name and name of the file keeping section data from
// ArgValue, loads data from the file, and stores section name and data
@@ -1210,6 +1241,14 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> ArgsArr,
};
}
+ for (auto *Arg : InputArgs.filtered(OBJCOPY_remove_note)) {
+ Expected<RemoveNoteInfo> NoteInfo = parseRemoveNoteInfo(Arg->getValue());
+ if (!NoteInfo)
+ return NoteInfo.takeError();
+
+ ELFConfig.NotesToRemove.push_back(*NoteInfo);
+ }
+
if (Config.DecompressDebugSections &&
Config.CompressionType != DebugCompressionType::None) {
return createStringError(
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
index 434b5ff92324eb..fbc6a59d9461e7 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
@@ -297,3 +297,7 @@ defm pad_to
"of zero or the value specified by the --gap-fill option. "
"This option is only supported for ELF input and binary output">,
MetaVarName<"address">;
+
+defm remove_note
+ : Eq<"remove-note", "Remove note(s) with <type_id> and optional <name>">,
+ MetaVarName<"[name/]type_id">;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please update the Command Guide documentation under llvm/docs with the new option.
Just some initial thoughts as I don't have time to go through this in much depth yet.
I'm concerned that this approach seems to only work with notes in segments, when the option seems to apply reasonably to ET_REL objects too, where the notes are in sections only. Indeed, more generally llvm-objcopy is designed to work primarily with sections, with direct interactions with segments being very limited. I think it would more naturally fit with llvm-objcopy's design to work with note sections first. You could swap the section contents is the same way as other replace section operations work, so that much of the existing writing behaviour works unchanged.
Clearly, something special would need to be done for PT_NOTE segments, especially those without sections. One option might be to create an artificial section that covers the whole segment (or any parts of the segment not covered by an existing section at least) so that the section-based approach outlined above would still work, then have special handling to adjust the file/memsz fields after the sections have been updated.
Somewhat related to the previous point, I'm concerned with how updateSegmentData
might interact with --remove-section
for sections in segments. The current approach of the latter is that removed section data is overwritten with null bytes, preserving the segment size. I understand that for a PT_NOTE this may not be quite so desirable, but I think we need to do something to prevent mixing the two behaviours, because what the end result should be is non-obvious. One way would be to emit an error if a note section/segment is touched by both --remove-section and --remove-note.
Done
Thanks! I'll try to rework the patch using the sections-first approach.
For the initial implementation, I plan to simply refuse to remove notes that reside in PT_LOAD segments. It seems that it is possible to actually remove data in non-load segments and update their contents and size accordingly. The sections in such segments and sections without segments (i.e. in ET_REL files) can also be updated in this way. Later, we can update the |
For core files, llvm-readobj dumps note segments as a whole, without reporting individual sections.
We can't rely on the existing code to update However, the existing code is suitable for sections that are not associated with segments. I've updated the patch, so sections in relocatable objects are also supported now.
This would either expose these temporary sections in the output file, or require some non-trivial changes to the code. And yet this does not solve the issue of gaps in
I suspect that there may also be some problems with using the new command together with |
Yeah, you'd need to compress the sections together and then shrink the segment. I'm not sure if there's existing code to modify a segment in this way yet.
I'm happy with segment support being added later.
I was thinking something similar to how the ELF Header and Program Header table work, but those are pseudo-segments, not pseudo-sections. Nonetheless, I don't think it would be too hard to add a flag to the artificial section to indicate it shouldn't be included in the section header table. Again though, I'm happy for that to be deferred to a later patch, if it makes things simpler in the first one.
IIRC, we loosely model --update-section as --remove-section followed by --add-section. --add-section is deliberately done after --remove-section, to allow for a section to be replaced (and what would be the point of removing a section that was just added after all). I guess --remove-note would be similar to --update-section in that it's essentially another way of changing the contents of a note section. As such, it would come after --remove-section operations, but it's more ambiguous as to what order it applies to with regards to the other. I don't think --add-section + --remove-note interact, so I guess it doesn't matter there. --update-section + --remove-note is mostly nonsensical, where the same section is touched. One possible exception to the above is adding/updating a note section, but wanting to remove a note from the added/modified section, because the input data wasn't quite in the right format. I don't see a use-case for --remove-note coming before the operations, but this would indicate it should come after, if it could be made to work. I also don't think the use-case is that strong, so an error is equally fine by me. I'm not going to look at the details today - it'll have to wait until the new year when I'm back in the (home-)office. |
And that exactly what my code for updating segments did. There was now existing code for anything similar. Current code expects sections to stay in the same places within segments.
OK, I've removed the support for segments for now to simplify the patch. I'll create a separate review for that after this patch is landed.
If we add the flag to mark a section as artificial, we will have to check it in many places, such as
One can argue that the intention might be to remove some unwanted notes and add new ones using these commands... or to make sure that the requested notes do not appear in the produced output, even if they are in the section added by these commands. So, to avoid speculation and overthinking, it is easier to reject such combinations.
Thanks! |
Ping |
On my TODO list. I plan on looking at this tomorrow, if I can. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code mostly looks fine. I need to go over the tests again, but have run out of time for today.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the review! Please have a look at the updates.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
This reverts commit 9324e6a.
This fixes "unused-local-typedef" warnings in 9324e6a. This adds an option `--remove-note=[name/]type` to selectively delete notes in ELF files, where `type` is the numeric value of the note type and `name` is the name of the originator. The name can be omitted, in which case all notes of the specified type will be removed. For now, only `SHT_NOTE` sections that are not associated with segments are handled. The implementation can be extended later as needed. RFC: https://discourse.llvm.org/t/rfc-llvm-objcopy-feature-for-editing-notes/83491
This adds an option
--remove-note
to selectively delete notes in an ELF file. For now, it is expected to be useful for editing core dump files; in particular, it searches for the notes inPT_NOTE
segments and does not handle nested segments. The implementation can be extended later as needed.RFC: https://discourse.llvm.org/t/rfc-llvm-objcopy-feature-for-editing-notes/83491