Skip to content

Raise interrupt for used descriptors on tx queue and use edged triggered epoll. #1444

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
- The backtrace are printed on `panic`, no longer causing a seccomp fault.
- Fixed #1375 - Change logger options type from Value to Vec<LogOption> to
prevent potential unwrap on None panics.
- Raise interrupt for TX queue used descriptors - Github issue #1436
- Fixed a bug that causes 100% cpu load when the net device rx is throttled
by the ratelimiter - Github issue #1439

## [0.19.0]

Expand Down
109 changes: 57 additions & 52 deletions src/devices/src/virtio/net.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,6 @@ pub struct NetEpollHandler {
acked_features: u64,
mmds_ns: Option<MmdsNetworkStack>,
guest_mac: Option<MacAddr>,
epoll_fd: RawFd,
rx_tap_listening: bool,
rx_tap_epoll_token: u64,

#[cfg(test)]
test_mutators: tests::TestMutators,
Expand Down Expand Up @@ -397,6 +394,7 @@ impl NetEpollHandler {
// trigger a process_rx() which checks if there are any new frames to be sent, starting
// with the MMDS network stack.
let mut process_rx_for_mmds = false;
let mut raise_irq = false;

while let Some(head) = self.tx.queue.pop(&self.mem) {
// If limiter.consume() fails it means there is no more TokenType::Ops
Expand Down Expand Up @@ -474,6 +472,11 @@ impl NetEpollHandler {
}

self.tx.queue.add_used(&self.mem, head_index, 0);
raise_irq = true;
}

if raise_irq {
self.signal_used_queue()?;
}

// An incoming frame for the MMDS may trigger the transmission of a new message.
Expand All @@ -500,28 +503,6 @@ impl NetEpollHandler {
fn read_tap(&mut self) -> io::Result<usize> {
self.tap.read(&mut self.rx.frame_buf)
}

fn register_tap_rx_listener(&mut self) -> std::result::Result<(), std::io::Error> {
epoll::ctl(
self.epoll_fd,
epoll::ControlOptions::EPOLL_CTL_ADD,
self.tap.as_raw_fd(),
epoll::Event::new(epoll::Events::EPOLLIN, self.rx_tap_epoll_token),
)?;
self.rx_tap_listening = true;
Ok(())
}

fn unregister_tap_rx_listener(&mut self) -> std::result::Result<(), std::io::Error> {
epoll::ctl(
self.epoll_fd,
epoll::ControlOptions::EPOLL_CTL_DEL,
self.tap.as_raw_fd(),
epoll::Event::new(epoll::Events::EPOLLIN, self.rx_tap_epoll_token),
)?;
self.rx_tap_listening = false;
Ok(())
}
}

impl EpollHandler for NetEpollHandler {
Expand All @@ -541,10 +522,6 @@ impl EpollHandler for NetEpollHandler {
underlying: e,
})
} else {
if !self.rx_tap_listening {
self.register_tap_rx_listener()
.map_err(DeviceError::IoError)?;
}
// If the limiter is not blocked, resume the receiving of bytes.
if !self.rx.rate_limiter.is_blocked() {
// There should be a buffer available now to receive the frame into.
Expand All @@ -558,8 +535,6 @@ impl EpollHandler for NetEpollHandler {
METRICS.net.rx_tap_event_count.inc();

if self.rx.queue.is_empty(&self.mem) {
self.unregister_tap_rx_listener()
.map_err(DeviceError::IoError)?;
return Err(DeviceError::NoAvailBuffers);
}

Expand Down Expand Up @@ -821,6 +796,8 @@ impl VirtioDevice for Net {
} else {
None
};
let tap_fd = tap.as_raw_fd();

let handler = NetEpollHandler {
rx: RxVirtio::new(
rx_queue,
Expand All @@ -839,9 +816,6 @@ impl VirtioDevice for Net {
acked_features: self.acked_features,
mmds_ns,
guest_mac: self.guest_mac(),
epoll_fd: self.epoll_config.epoll_raw_fd,
rx_tap_listening: false,
rx_tap_epoll_token: self.epoll_config.rx_tap_token,

#[cfg(test)]
test_mutators: tests::TestMutators::default(),
Expand All @@ -861,6 +835,20 @@ impl VirtioDevice for Net {

//TODO: barrier needed here maybe?

epoll::ctl(
self.epoll_config.epoll_raw_fd,
epoll::ControlOptions::EPOLL_CTL_ADD,
tap_fd,
epoll::Event::new(
epoll::Events::EPOLLIN | epoll::Events::EPOLLET,
self.epoll_config.rx_tap_token,
),
)
.map_err(|e| {
METRICS.net.activate_fails.inc();
ActivateError::EpollCtl(e)
})?;

epoll::ctl(
self.epoll_config.epoll_raw_fd,
epoll::ControlOptions::EPOLL_CTL_ADD,
Expand Down Expand Up @@ -1124,7 +1112,6 @@ mod tests {
let interrupt_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap();
let rx_queue_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap();
let tx_queue_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap();
let epoll_fd = epoll::create(true).unwrap();

(
NetEpollHandler {
Expand All @@ -1138,9 +1125,6 @@ mod tests {
mmds_ns: Some(MmdsNetworkStack::new_with_defaults()),
test_mutators,
guest_mac: None,
epoll_fd,
rx_tap_epoll_token: 0,
rx_tap_listening: false,
},
txq,
rxq,
Expand Down Expand Up @@ -1476,15 +1460,12 @@ mod tests {
};
let mem = GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap();
let (mut h, _txq, rxq) = default_test_netepollhandler(&mem, test_mutators);
h.register_tap_rx_listener().unwrap();

// The RX queue is empty.
match h.handle_event(RX_TAP_EVENT, epoll::Events::EPOLLIN) {
Err(DeviceError::NoAvailBuffers) => (),
_ => panic!("invalid"),
}
// Since the RX was empty, we shouldn't be listening for tap RX events.
assert!(!h.rx_tap_listening);

// Fake an avail buffer; this time, tap reading should error out.
rxq.avail.idx.set(1);
Expand Down Expand Up @@ -1599,7 +1580,7 @@ mod tests {
h.interrupt_evt.write(1).unwrap();
h.handle_event(RX_TAP_EVENT, EPOLLIN).unwrap();
assert!(h.rx.deferred_frame);
assert_eq!(h.interrupt_evt.read().unwrap(), 2);
assert_eq!(h.interrupt_evt.read().unwrap(), 3);
// The #cfg(test) enabled version of read_tap always returns 1234 bytes (or the len of
// the buffer, whichever is smaller).
assert_eq!(rxq.used.ring[0].get().len, 1234);
Expand Down Expand Up @@ -1705,8 +1686,8 @@ mod tests {
}

// wait for 100ms to give the rate-limiter timer a chance to replenish
// wait for an extra 50ms to make sure the timerfd event makes its way from the kernel
thread::sleep(Duration::from_millis(150));
// wait for an extra 100ms to make sure the timerfd event makes its way from the kernel
thread::sleep(Duration::from_millis(200));

// following TX procedure should succeed because bandwidth should now be available
{
Expand Down Expand Up @@ -1750,14 +1731,14 @@ mod tests {
assert!(h.get_rx_rate_limiter().is_blocked());
assert!(h.rx.deferred_frame);
// assert that no operation actually completed (limiter blocked it)
assert_eq!(h.interrupt_evt.read().unwrap(), 1);
assert_eq!(h.interrupt_evt.read().unwrap(), 2);
// make sure the data is still queued for processing
assert_eq!(rxq.used.idx.get(), 0);
}

// wait for 100ms to give the rate-limiter timer a chance to replenish
// wait for an extra 50ms to make sure the timerfd event makes its way from the kernel
thread::sleep(Duration::from_millis(150));
// wait for an extra 100ms to make sure the timerfd event makes its way from the kernel
thread::sleep(Duration::from_millis(200));

// following RX procedure should succeed because bandwidth should now be available
{
Expand Down Expand Up @@ -1813,8 +1794,8 @@ mod tests {
}

// wait for 100ms to give the rate-limiter timer a chance to replenish
// wait for an extra 50ms to make sure the timerfd event makes its way from the kernel
thread::sleep(Duration::from_millis(150));
// wait for an extra 100ms to make sure the timerfd event makes its way from the kernel
thread::sleep(Duration::from_millis(200));

// following TX procedure should succeed because ops should now be available
{
Expand Down Expand Up @@ -1853,7 +1834,7 @@ mod tests {
assert!(h.get_rx_rate_limiter().is_blocked());
assert!(h.rx.deferred_frame);
// assert that no operation actually completed (limiter blocked it)
assert_eq!(h.interrupt_evt.read().unwrap(), 1);
assert_eq!(h.interrupt_evt.read().unwrap(), 2);
// make sure the data is still queued for processing
assert_eq!(rxq.used.idx.get(), 0);

Expand All @@ -1868,8 +1849,8 @@ mod tests {
}

// wait for 100ms to give the rate-limiter timer a chance to replenish
// wait for an extra 50ms to make sure the timerfd event makes its way from the kernel
thread::sleep(Duration::from_millis(150));
// wait for an extra 100ms to make sure the timerfd event makes its way from the kernel
thread::sleep(Duration::from_millis(200));

// following RX procedure should succeed because ops should now be available
{
Expand Down Expand Up @@ -1918,4 +1899,28 @@ mod tests {
compare_buckets(h.get_tx_rate_limiter().bandwidth().unwrap(), &tx_bytes);
compare_buckets(h.get_tx_rate_limiter().ops().unwrap(), &tx_ops);
}

#[test]
fn test_tx_queue_interrupt() {
// Regression test for https://github.com/firecracker-microvm/firecracker/issues/1436 .
let mem = GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap();
let (mut h, txq, _) = default_test_netepollhandler(&mem, TestMutators::default());

let daddr = 0x2000;
assert!(daddr > txq.end().0);

// Do some TX.
txq.avail.idx.set(1);
txq.avail.ring[0].set(0);
txq.dtable[0].set(daddr, 0x1000, 0, 0);

// trigger the TX handler
h.tx.queue_evt.write(1).unwrap();
h.handle_event(TX_QUEUE_EVENT, EPOLLIN).unwrap();

// Verify if TX queue was processed.
assert_eq!(txq.used.idx.get(), 1);
// Check if interrupt was triggered.
assert_eq!(h.interrupt_evt.read().unwrap(), 1);
}
}
2 changes: 1 addition & 1 deletion tests/integration_tests/build/test_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import host_tools.cargo_build as host # pylint: disable=import-error

COVERAGE_TARGET_PCT = 85.2
COVERAGE_TARGET_PCT = 85.1
COVERAGE_MAX_DELTA = 0.01

CARGO_KCOV_REL_PATH = os.path.join(host.CARGO_BUILD_REL_PATH, 'kcov')
Expand Down