Skip to content

Commit 9f98e8e

Browse files
authored
Add support for symbolicating APK/ZIP-embedded libraries on Android (#662)
By default, modern Android build tools will store native libraries uncompressed, and the loader will map them directly from the APK (instead of the package manager extracting them on installation). This commit adds support for symbolicating these embedded libraries. To avoid parsing ZIP structures, the offset of the library within the archive is determined via /proc/self/maps. ref: https://android.googlesource.com/platform/bionic/+/main/android-changes-for-ndk-developers.md#opening-shared-libraries-directly-from-an-apk
1 parent 386a42a commit 9f98e8e

File tree

7 files changed

+149
-38
lines changed

7 files changed

+149
-38
lines changed

src/symbolize/gimli.rs

+38-9
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ impl<'data> Context<'data> {
186186
fn mmap(path: &Path) -> Option<Mmap> {
187187
let file = File::open(path).ok()?;
188188
let len = file.metadata().ok()?.len().try_into().ok()?;
189-
unsafe { Mmap::map(&file, len) }
189+
unsafe { Mmap::map(&file, len, 0) }
190190
}
191191

192192
cfg_if::cfg_if! {
@@ -268,6 +268,21 @@ struct Cache {
268268

269269
struct Library {
270270
name: OsString,
271+
#[cfg(target_os = "android")]
272+
/// On Android, the dynamic linker [can map libraries directly from a
273+
/// ZIP archive][ndk-linker-changes] (typically an `.apk`).
274+
///
275+
/// The linker requires that these libraries are stored uncompressed
276+
/// and page-aligned.
277+
///
278+
/// These "embedded" libraries have filepaths of the form
279+
/// `/path/to/my.apk!/lib/mylib.so` (where `/path/to/my.apk` is the archive
280+
/// and `lib/mylib.so` is the name of the library within the archive).
281+
///
282+
/// This mechanism is present on Android since API level 23.
283+
///
284+
/// [ndk-linker-changes]: https://android.googlesource.com/platform/bionic/+/main/android-changes-for-ndk-developers.md#opening-shared-libraries-directly-from-an-apk
285+
zip_offset: Option<u64>,
271286
#[cfg(target_os = "aix")]
272287
/// On AIX, the library mmapped can be a member of a big-archive file.
273288
/// For example, with a big-archive named libfoo.a containing libbar.so,
@@ -294,17 +309,31 @@ struct LibrarySegment {
294309
len: usize,
295310
}
296311

297-
#[cfg(target_os = "aix")]
298312
fn create_mapping(lib: &Library) -> Option<Mapping> {
299-
let name = &lib.name;
300-
let member_name = &lib.member_name;
301-
Mapping::new(name.as_ref(), member_name)
313+
cfg_if::cfg_if! {
314+
if #[cfg(target_os = "aix")] {
315+
Mapping::new(lib.name.as_ref(), &lib.member_name)
316+
} else if #[cfg(target_os = "android")] {
317+
Mapping::new_android(lib.name.as_ref(), lib.zip_offset)
318+
} else {
319+
Mapping::new(lib.name.as_ref())
320+
}
321+
}
302322
}
303323

304-
#[cfg(not(target_os = "aix"))]
305-
fn create_mapping(lib: &Library) -> Option<Mapping> {
306-
let name = &lib.name;
307-
Mapping::new(name.as_ref())
324+
/// Try to extract the archive path from an "embedded" library path
325+
/// (e.g. `/path/to/my.apk` from `/path/to/my.apk!/mylib.so`).
326+
///
327+
/// Returns `None` if the path does not contain a `!/` separator.
328+
#[cfg(target_os = "android")]
329+
fn extract_zip_path_android(path: &mystd::ffi::OsStr) -> Option<&mystd::ffi::OsStr> {
330+
use mystd::os::unix::ffi::OsStrExt;
331+
332+
path.as_bytes()
333+
.windows(2)
334+
.enumerate()
335+
.find(|(_, chunk)| chunk == b"!/")
336+
.map(|(index, _)| mystd::ffi::OsStr::from_bytes(path.as_bytes().split_at(index).0))
308337
}
309338

310339
// unsafe because this is required to be externally synchronized

src/symbolize/gimli/elf.rs

+41
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,47 @@ impl Mapping {
4545
})
4646
}
4747

48+
/// On Android, shared objects can be loaded directly from a ZIP archive
49+
/// (see: [`super::Library::zip_offset`]).
50+
///
51+
/// If `zip_offset` is not None, we interpret the `path` as an
52+
/// "embedded" library path, and the value of `zip_offset` tells us where
53+
/// in the ZIP archive the library data starts.
54+
///
55+
/// We expect `zip_offset` to be page-aligned because the dynamic linker
56+
/// requires this. Otherwise, loading the embedded library will fail.
57+
///
58+
/// If we fail to load an embedded library for any reason, we fallback to
59+
/// interpreting the path as a literal file on disk (same as calling [`Self::new`]).
60+
#[cfg(target_os = "android")]
61+
pub fn new_android(path: &Path, zip_offset: Option<u64>) -> Option<Mapping> {
62+
fn map_embedded_library(path: &Path, zip_offset: u64) -> Option<Mapping> {
63+
// get path of ZIP archive (delimited by `!/`)
64+
let zip_path = Path::new(super::extract_zip_path_android(path.as_os_str())?);
65+
66+
let file = fs::File::open(zip_path).ok()?;
67+
let len = file.metadata().ok()?.len();
68+
69+
// NOTE: we map the remainder of the entire archive instead of just the library so we don't have to determine its length
70+
// NOTE: mmap will fail if `zip_offset` is not page-aligned
71+
let map = unsafe {
72+
super::mmap::Mmap::map(&file, usize::try_from(len - zip_offset).ok()?, zip_offset)
73+
}?;
74+
75+
Mapping::mk(map, |map, stash| {
76+
Context::new(stash, Object::parse(&map)?, None, None)
77+
})
78+
}
79+
80+
// if ZIP offset is given, try mapping as a ZIP-embedded library
81+
// otherwise, fallback to mapping as a literal filepath
82+
if let Some(zip_offset) = zip_offset {
83+
map_embedded_library(path, zip_offset).or_else(|| Self::new(path))
84+
} else {
85+
Self::new(path)
86+
}
87+
}
88+
4889
/// Load debuginfo from an external debug file.
4990
fn new_debug(original_path: &Path, path: PathBuf, crc: Option<u32>) -> Option<Mapping> {
5091
let map = super::mmap(&path)?;

src/symbolize/gimli/libs_dl_iterate_phdr.rs

+47-21
Original file line numberDiff line numberDiff line change
@@ -6,51 +6,62 @@ use super::mystd::borrow::ToOwned;
66
use super::mystd::env;
77
use super::mystd::ffi::{CStr, OsStr};
88
use super::mystd::os::unix::prelude::*;
9-
use super::{Library, LibrarySegment, OsString, Vec};
9+
use super::{parse_running_mmaps, Library, LibrarySegment, OsString, Vec};
1010
use core::slice;
1111

12+
struct CallbackData {
13+
libs: Vec<Library>,
14+
maps: Option<Vec<parse_running_mmaps::MapsEntry>>,
15+
}
1216
pub(super) fn native_libraries() -> Vec<Library> {
13-
let mut ret = Vec::new();
17+
let mut cb_data = CallbackData {
18+
libs: Vec::new(),
19+
#[cfg(not(target_os = "hurd"))]
20+
maps: parse_running_mmaps::parse_maps().ok(),
21+
#[cfg(target_os = "hurd")]
22+
maps: None,
23+
};
1424
unsafe {
15-
libc::dl_iterate_phdr(Some(callback), core::ptr::addr_of_mut!(ret).cast());
25+
libc::dl_iterate_phdr(Some(callback), core::ptr::addr_of_mut!(cb_data).cast());
1626
}
17-
ret
27+
cb_data.libs
1828
}
1929

20-
fn infer_current_exe(base_addr: usize) -> OsString {
21-
cfg_if::cfg_if! {
22-
if #[cfg(not(target_os = "hurd"))] {
23-
if let Ok(entries) = super::parse_running_mmaps::parse_maps() {
24-
let opt_path = entries
25-
.iter()
26-
.find(|e| e.ip_matches(base_addr) && e.pathname().len() > 0)
27-
.map(|e| e.pathname())
28-
.cloned();
29-
if let Some(path) = opt_path {
30-
return path;
31-
}
32-
}
30+
fn infer_current_exe(
31+
maps: &Option<Vec<parse_running_mmaps::MapsEntry>>,
32+
base_addr: usize,
33+
) -> OsString {
34+
#[cfg(not(target_os = "hurd"))]
35+
if let Some(entries) = maps {
36+
let opt_path = entries
37+
.iter()
38+
.find(|e| e.ip_matches(base_addr) && e.pathname().len() > 0)
39+
.map(|e| e.pathname())
40+
.cloned();
41+
if let Some(path) = opt_path {
42+
return path;
3343
}
3444
}
45+
3546
env::current_exe().map(|e| e.into()).unwrap_or_default()
3647
}
3748

3849
/// # Safety
3950
/// `info` must be a valid pointer.
40-
/// `vec` must be a valid pointer to `Vec<Library>`
51+
/// `data` must be a valid pointer to `CallbackData`.
4152
#[forbid(unsafe_op_in_unsafe_fn)]
4253
unsafe extern "C" fn callback(
4354
info: *mut libc::dl_phdr_info,
4455
_size: libc::size_t,
45-
vec: *mut libc::c_void,
56+
data: *mut libc::c_void,
4657
) -> libc::c_int {
4758
// SAFETY: We are guaranteed these fields:
4859
let dlpi_addr = unsafe { (*info).dlpi_addr };
4960
let dlpi_name = unsafe { (*info).dlpi_name };
5061
let dlpi_phdr = unsafe { (*info).dlpi_phdr };
5162
let dlpi_phnum = unsafe { (*info).dlpi_phnum };
5263
// SAFETY: We assured this.
53-
let libs = unsafe { &mut *vec.cast::<Vec<Library>>() };
64+
let CallbackData { libs, maps } = unsafe { &mut *data.cast::<CallbackData>() };
5465
// most implementations give us the main program first
5566
let is_main = libs.is_empty();
5667
// we may be statically linked, which means we are main and mostly one big blob of code
@@ -63,7 +74,7 @@ unsafe extern "C" fn callback(
6374
// don't try to look up our name from /proc/self/maps, it'll get silly
6475
env::current_exe().unwrap_or_default().into_os_string()
6576
} else if is_main && no_given_name {
66-
infer_current_exe(dlpi_addr as usize)
77+
infer_current_exe(&maps, dlpi_addr as usize)
6778
} else {
6879
// this fallback works even if we are main, because some platforms give the name anyways
6980
if dlpi_name.is_null() {
@@ -73,6 +84,19 @@ unsafe extern "C" fn callback(
7384
OsStr::from_bytes(unsafe { CStr::from_ptr(dlpi_name) }.to_bytes()).to_owned()
7485
}
7586
};
87+
#[cfg(target_os = "android")]
88+
let zip_offset: Option<u64> = {
89+
// only check for ZIP-embedded file if we have data from /proc/self/maps
90+
maps.as_ref().and_then(|maps| {
91+
// check if file is embedded within a ZIP archive by searching for `!/`
92+
super::extract_zip_path_android(&name).and_then(|_| {
93+
// find MapsEntry matching library's base address and get its file offset
94+
maps.iter()
95+
.find(|m| m.ip_matches(dlpi_addr as usize))
96+
.map(|m| m.offset())
97+
})
98+
})
99+
};
76100
let headers = if dlpi_phdr.is_null() || dlpi_phnum == 0 {
77101
&[]
78102
} else {
@@ -81,6 +105,8 @@ unsafe extern "C" fn callback(
81105
};
82106
libs.push(Library {
83107
name,
108+
#[cfg(target_os = "android")]
109+
zip_offset,
84110
segments: headers
85111
.iter()
86112
.map(|header| LibrarySegment {

src/symbolize/gimli/mmap_fake.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
use super::{mystd::io::Read, File};
1+
use super::mystd::io::{Read, Seek, SeekFrom};
2+
use super::File;
23
use alloc::vec::Vec;
34
use core::ops::Deref;
45

@@ -7,10 +8,11 @@ pub struct Mmap {
78
}
89

910
impl Mmap {
10-
pub unsafe fn map(mut file: &File, len: usize) -> Option<Mmap> {
11+
pub unsafe fn map(mut file: &File, len: usize, offset: u64) -> Option<Mmap> {
1112
let mut mmap = Mmap {
1213
vec: Vec::with_capacity(len),
1314
};
15+
file.seek(SeekFrom::Start(offset));
1416
file.read_to_end(&mut mmap.vec).ok()?;
1517
Some(mmap)
1618
}

src/symbolize/gimli/mmap_unix.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@ pub struct Mmap {
1515
}
1616

1717
impl Mmap {
18-
pub unsafe fn map(file: &File, len: usize) -> Option<Mmap> {
18+
pub unsafe fn map(file: &File, len: usize, offset: u64) -> Option<Mmap> {
1919
let ptr = mmap64(
2020
ptr::null_mut(),
2121
len,
2222
libc::PROT_READ,
2323
libc::MAP_PRIVATE,
2424
file.as_raw_fd(),
25-
0,
25+
offset.try_into().ok()?,
2626
);
2727
if ptr == libc::MAP_FAILED {
2828
return None;

src/symbolize/gimli/mmap_windows.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ pub struct Mmap {
1616
}
1717

1818
impl Mmap {
19-
pub unsafe fn map(file: &File, len: usize) -> Option<Mmap> {
19+
pub unsafe fn map(file: &File, len: usize, offset: u64) -> Option<Mmap> {
2020
let file = file.try_clone().ok()?;
2121
let mapping = CreateFileMappingA(
2222
file.as_raw_handle(),
@@ -29,7 +29,13 @@ impl Mmap {
2929
if mapping.is_null() {
3030
return None;
3131
}
32-
let ptr = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, len);
32+
let ptr = MapViewOfFile(
33+
mapping,
34+
FILE_MAP_READ,
35+
(offset >> 32) as u32,
36+
offset as u32,
37+
len,
38+
);
3339
CloseHandle(mapping);
3440
if ptr.Value.is_null() {
3541
return None;

src/symbolize/gimli/parse_running_mmaps_unix.rs

+9-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ pub(super) struct MapsEntry {
2020
/// p = private (copy on write)
2121
perms: [char; 4],
2222
/// Offset into the file (or "whatever").
23-
offset: usize,
23+
offset: u64,
2424
/// device (major, minor)
2525
dev: (usize, usize),
2626
/// inode on the device. 0 indicates that no inode is associated with the memory region (e.g. uninitalized data aka BSS).
@@ -76,6 +76,11 @@ impl MapsEntry {
7676
pub(super) fn ip_matches(&self, ip: usize) -> bool {
7777
self.address.0 <= ip && ip < self.address.1
7878
}
79+
80+
#[cfg(target_os = "android")]
81+
pub(super) fn offset(&self) -> u64 {
82+
self.offset
83+
}
7984
}
8085

8186
impl FromStr for MapsEntry {
@@ -118,6 +123,8 @@ impl FromStr for MapsEntry {
118123
let pathname_str = s.trim_start();
119124

120125
let hex = |s| usize::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number");
126+
let hex64 = |s| u64::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number");
127+
121128
let address = if let Some((start, limit)) = range_str.split_once('-') {
122129
(hex(start)?, hex(limit)?)
123130
} else {
@@ -132,7 +139,7 @@ impl FromStr for MapsEntry {
132139
}
133140
perms
134141
};
135-
let offset = hex(offset_str)?;
142+
let offset = hex64(offset_str)?;
136143
let dev = if let Some((major, minor)) = dev_str.split_once(':') {
137144
(hex(major)?, hex(minor)?)
138145
} else {

0 commit comments

Comments
 (0)