diff --git a/CHANGELOG.md b/CHANGELOG.md index 51050c7e7b0..c9d4bf5a6ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ - The backtrace are printed on `panic`, no longer causing a seccomp fault. - Fixed #1375 - Change logger options type from Value to Vec to prevent potential unwrap on None panics. +- Raise interrupt for TX queue used descriptors - Github issue #1436 +- Fixed a bug that causes 100% cpu load when the net device rx is throttled + by the ratelimiter - Github issue #1439 ## [0.19.0] diff --git a/src/devices/src/virtio/net.rs b/src/devices/src/virtio/net.rs index 5ab37042696..d3ed52a0fac 100644 --- a/src/devices/src/virtio/net.rs +++ b/src/devices/src/virtio/net.rs @@ -151,9 +151,6 @@ pub struct NetEpollHandler { acked_features: u64, mmds_ns: Option, guest_mac: Option, - epoll_fd: RawFd, - rx_tap_listening: bool, - rx_tap_epoll_token: u64, #[cfg(test)] test_mutators: tests::TestMutators, @@ -397,6 +394,7 @@ impl NetEpollHandler { // trigger a process_rx() which checks if there are any new frames to be sent, starting // with the MMDS network stack. let mut process_rx_for_mmds = false; + let mut raise_irq = false; while let Some(head) = self.tx.queue.pop(&self.mem) { // If limiter.consume() fails it means there is no more TokenType::Ops @@ -474,6 +472,11 @@ impl NetEpollHandler { } self.tx.queue.add_used(&self.mem, head_index, 0); + raise_irq = true; + } + + if raise_irq { + self.signal_used_queue()?; } // An incoming frame for the MMDS may trigger the transmission of a new message. @@ -500,28 +503,6 @@ impl NetEpollHandler { fn read_tap(&mut self) -> io::Result { self.tap.read(&mut self.rx.frame_buf) } - - fn register_tap_rx_listener(&mut self) -> std::result::Result<(), std::io::Error> { - epoll::ctl( - self.epoll_fd, - epoll::ControlOptions::EPOLL_CTL_ADD, - self.tap.as_raw_fd(), - epoll::Event::new(epoll::Events::EPOLLIN, self.rx_tap_epoll_token), - )?; - self.rx_tap_listening = true; - Ok(()) - } - - fn unregister_tap_rx_listener(&mut self) -> std::result::Result<(), std::io::Error> { - epoll::ctl( - self.epoll_fd, - epoll::ControlOptions::EPOLL_CTL_DEL, - self.tap.as_raw_fd(), - epoll::Event::new(epoll::Events::EPOLLIN, self.rx_tap_epoll_token), - )?; - self.rx_tap_listening = false; - Ok(()) - } } impl EpollHandler for NetEpollHandler { @@ -541,10 +522,6 @@ impl EpollHandler for NetEpollHandler { underlying: e, }) } else { - if !self.rx_tap_listening { - self.register_tap_rx_listener() - .map_err(DeviceError::IoError)?; - } // If the limiter is not blocked, resume the receiving of bytes. if !self.rx.rate_limiter.is_blocked() { // There should be a buffer available now to receive the frame into. @@ -558,8 +535,6 @@ impl EpollHandler for NetEpollHandler { METRICS.net.rx_tap_event_count.inc(); if self.rx.queue.is_empty(&self.mem) { - self.unregister_tap_rx_listener() - .map_err(DeviceError::IoError)?; return Err(DeviceError::NoAvailBuffers); } @@ -821,6 +796,8 @@ impl VirtioDevice for Net { } else { None }; + let tap_fd = tap.as_raw_fd(); + let handler = NetEpollHandler { rx: RxVirtio::new( rx_queue, @@ -839,9 +816,6 @@ impl VirtioDevice for Net { acked_features: self.acked_features, mmds_ns, guest_mac: self.guest_mac(), - epoll_fd: self.epoll_config.epoll_raw_fd, - rx_tap_listening: false, - rx_tap_epoll_token: self.epoll_config.rx_tap_token, #[cfg(test)] test_mutators: tests::TestMutators::default(), @@ -861,6 +835,20 @@ impl VirtioDevice for Net { //TODO: barrier needed here maybe? + epoll::ctl( + self.epoll_config.epoll_raw_fd, + epoll::ControlOptions::EPOLL_CTL_ADD, + tap_fd, + epoll::Event::new( + epoll::Events::EPOLLIN | epoll::Events::EPOLLET, + self.epoll_config.rx_tap_token, + ), + ) + .map_err(|e| { + METRICS.net.activate_fails.inc(); + ActivateError::EpollCtl(e) + })?; + epoll::ctl( self.epoll_config.epoll_raw_fd, epoll::ControlOptions::EPOLL_CTL_ADD, @@ -1124,7 +1112,6 @@ mod tests { let interrupt_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let rx_queue_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let tx_queue_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - let epoll_fd = epoll::create(true).unwrap(); ( NetEpollHandler { @@ -1138,9 +1125,6 @@ mod tests { mmds_ns: Some(MmdsNetworkStack::new_with_defaults()), test_mutators, guest_mac: None, - epoll_fd, - rx_tap_epoll_token: 0, - rx_tap_listening: false, }, txq, rxq, @@ -1476,15 +1460,12 @@ mod tests { }; let mem = GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap(); let (mut h, _txq, rxq) = default_test_netepollhandler(&mem, test_mutators); - h.register_tap_rx_listener().unwrap(); // The RX queue is empty. match h.handle_event(RX_TAP_EVENT, epoll::Events::EPOLLIN) { Err(DeviceError::NoAvailBuffers) => (), _ => panic!("invalid"), } - // Since the RX was empty, we shouldn't be listening for tap RX events. - assert!(!h.rx_tap_listening); // Fake an avail buffer; this time, tap reading should error out. rxq.avail.idx.set(1); @@ -1599,7 +1580,7 @@ mod tests { h.interrupt_evt.write(1).unwrap(); h.handle_event(RX_TAP_EVENT, EPOLLIN).unwrap(); assert!(h.rx.deferred_frame); - assert_eq!(h.interrupt_evt.read().unwrap(), 2); + assert_eq!(h.interrupt_evt.read().unwrap(), 3); // The #cfg(test) enabled version of read_tap always returns 1234 bytes (or the len of // the buffer, whichever is smaller). assert_eq!(rxq.used.ring[0].get().len, 1234); @@ -1705,8 +1686,8 @@ mod tests { } // wait for 100ms to give the rate-limiter timer a chance to replenish - // wait for an extra 50ms to make sure the timerfd event makes its way from the kernel - thread::sleep(Duration::from_millis(150)); + // wait for an extra 100ms to make sure the timerfd event makes its way from the kernel + thread::sleep(Duration::from_millis(200)); // following TX procedure should succeed because bandwidth should now be available { @@ -1750,14 +1731,14 @@ mod tests { assert!(h.get_rx_rate_limiter().is_blocked()); assert!(h.rx.deferred_frame); // assert that no operation actually completed (limiter blocked it) - assert_eq!(h.interrupt_evt.read().unwrap(), 1); + assert_eq!(h.interrupt_evt.read().unwrap(), 2); // make sure the data is still queued for processing assert_eq!(rxq.used.idx.get(), 0); } // wait for 100ms to give the rate-limiter timer a chance to replenish - // wait for an extra 50ms to make sure the timerfd event makes its way from the kernel - thread::sleep(Duration::from_millis(150)); + // wait for an extra 100ms to make sure the timerfd event makes its way from the kernel + thread::sleep(Duration::from_millis(200)); // following RX procedure should succeed because bandwidth should now be available { @@ -1813,8 +1794,8 @@ mod tests { } // wait for 100ms to give the rate-limiter timer a chance to replenish - // wait for an extra 50ms to make sure the timerfd event makes its way from the kernel - thread::sleep(Duration::from_millis(150)); + // wait for an extra 100ms to make sure the timerfd event makes its way from the kernel + thread::sleep(Duration::from_millis(200)); // following TX procedure should succeed because ops should now be available { @@ -1853,7 +1834,7 @@ mod tests { assert!(h.get_rx_rate_limiter().is_blocked()); assert!(h.rx.deferred_frame); // assert that no operation actually completed (limiter blocked it) - assert_eq!(h.interrupt_evt.read().unwrap(), 1); + assert_eq!(h.interrupt_evt.read().unwrap(), 2); // make sure the data is still queued for processing assert_eq!(rxq.used.idx.get(), 0); @@ -1868,8 +1849,8 @@ mod tests { } // wait for 100ms to give the rate-limiter timer a chance to replenish - // wait for an extra 50ms to make sure the timerfd event makes its way from the kernel - thread::sleep(Duration::from_millis(150)); + // wait for an extra 100ms to make sure the timerfd event makes its way from the kernel + thread::sleep(Duration::from_millis(200)); // following RX procedure should succeed because ops should now be available { @@ -1918,4 +1899,28 @@ mod tests { compare_buckets(h.get_tx_rate_limiter().bandwidth().unwrap(), &tx_bytes); compare_buckets(h.get_tx_rate_limiter().ops().unwrap(), &tx_ops); } + + #[test] + fn test_tx_queue_interrupt() { + // Regression test for https://github.com/firecracker-microvm/firecracker/issues/1436 . + let mem = GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap(); + let (mut h, txq, _) = default_test_netepollhandler(&mem, TestMutators::default()); + + let daddr = 0x2000; + assert!(daddr > txq.end().0); + + // Do some TX. + txq.avail.idx.set(1); + txq.avail.ring[0].set(0); + txq.dtable[0].set(daddr, 0x1000, 0, 0); + + // trigger the TX handler + h.tx.queue_evt.write(1).unwrap(); + h.handle_event(TX_QUEUE_EVENT, EPOLLIN).unwrap(); + + // Verify if TX queue was processed. + assert_eq!(txq.used.idx.get(), 1); + // Check if interrupt was triggered. + assert_eq!(h.interrupt_evt.read().unwrap(), 1); + } } diff --git a/tests/integration_tests/build/test_coverage.py b/tests/integration_tests/build/test_coverage.py index 37012d1835b..3dab8a0fc30 100644 --- a/tests/integration_tests/build/test_coverage.py +++ b/tests/integration_tests/build/test_coverage.py @@ -19,7 +19,7 @@ import host_tools.cargo_build as host # pylint: disable=import-error -COVERAGE_TARGET_PCT = 85.2 +COVERAGE_TARGET_PCT = 85.1 COVERAGE_MAX_DELTA = 0.01 CARGO_KCOV_REL_PATH = os.path.join(host.CARGO_BUILD_REL_PATH, 'kcov')