|
| 1 | +use std::sync::Arc; |
| 2 | + |
| 3 | +use assert_matches::assert_matches; |
| 4 | +use scylla::client::session::Session; |
| 5 | +use scylla::client::session_builder::SessionBuilder; |
| 6 | +use scylla::errors::{ExecutionError, RequestAttemptError, SchemaAgreementError}; |
| 7 | +use scylla::policies::load_balancing::{NodeIdentifier, SingleTargetLoadBalancingPolicy}; |
| 8 | +use scylla::response::query_result::QueryResult; |
| 9 | +use scylla::statement::Statement; |
| 10 | +use scylla_proxy::{ |
| 11 | + Condition, ProxyError, Reaction, RequestOpcode, RequestReaction, RequestRule, RunningProxy, |
| 12 | + ShardAwareness, WorkerError, |
| 13 | +}; |
| 14 | + |
| 15 | +use crate::utils::{ |
| 16 | + calculate_proxy_host_ids, setup_tracing, test_with_3_node_cluster, unique_keyspace_name, |
| 17 | +}; |
| 18 | + |
| 19 | +async fn run_some_ddl_with_unreachable_node( |
| 20 | + coordinator: NodeIdentifier, |
| 21 | + paused: usize, |
| 22 | + session: &Session, |
| 23 | + running_proxy: &mut RunningProxy, |
| 24 | +) -> Result<QueryResult, ExecutionError> { |
| 25 | + // Prevent fetching schema version. |
| 26 | + // It simulates a node that became unreachable after our DDL completed, |
| 27 | + // but the pool in the driver is not yet `Broken`. |
| 28 | + running_proxy.running_nodes[paused].change_request_rules(Some(vec![RequestRule( |
| 29 | + Condition::and( |
| 30 | + Condition::not(Condition::ConnectionRegisteredAnyEvent), |
| 31 | + Condition::and( |
| 32 | + Condition::RequestOpcode(RequestOpcode::Query), |
| 33 | + Condition::BodyContainsCaseSensitive(Box::new(*b"system.local")), |
| 34 | + ), |
| 35 | + ), |
| 36 | + // Simulates driver discovering that node is unreachable. |
| 37 | + RequestReaction::drop_connection(), |
| 38 | + )])); |
| 39 | + |
| 40 | + let ks = unique_keyspace_name(); |
| 41 | + let mut request = Statement::new(format!("CREATE KEYSPACE {ks} WITH REPLICATION = {{'class' : 'NetworkTopologyStrategy', 'replication_factor' : 1}}")); |
| 42 | + request.set_load_balancing_policy(Some(SingleTargetLoadBalancingPolicy::new( |
| 43 | + coordinator, |
| 44 | + None, |
| 45 | + ))); |
| 46 | + |
| 47 | + let result = session.query_unpaged(request, &[]).await; |
| 48 | + |
| 49 | + // Cleanup |
| 50 | + running_proxy.running_nodes[paused].change_request_rules(Some(vec![])); |
| 51 | + session |
| 52 | + .query_unpaged(format!("DROP KEYSPACE {ks}"), &[]) |
| 53 | + .await |
| 54 | + .unwrap(); |
| 55 | + |
| 56 | + result |
| 57 | +} |
| 58 | + |
| 59 | +// Verifies that auto schema agreement (performed after receiving response of DDL request) works correctly |
| 60 | +// when a node is paused. |
| 61 | +#[tokio::test] |
| 62 | +#[cfg_attr(scylla_cloud_tests, ignore)] |
| 63 | +async fn test_schema_await_with_paused_node() { |
| 64 | + setup_tracing(); |
| 65 | + |
| 66 | + let res = test_with_3_node_cluster( |
| 67 | + ShardAwareness::QueryNode, |
| 68 | + |proxy_uris, translation_map, mut running_proxy| async move { |
| 69 | + // DB preparation phase |
| 70 | + let session: Session = SessionBuilder::new() |
| 71 | + .known_node(proxy_uris[0].as_str()) |
| 72 | + .address_translator(Arc::new(translation_map.clone())) |
| 73 | + .build() |
| 74 | + .await |
| 75 | + .unwrap(); |
| 76 | + |
| 77 | + let host_ids = calculate_proxy_host_ids(&proxy_uris, &translation_map, &session); |
| 78 | + |
| 79 | + { |
| 80 | + // Case 1: Paused node is a coordinator for DDL. |
| 81 | + // DDL needs to fail. |
| 82 | + let result = run_some_ddl_with_unreachable_node( |
| 83 | + NodeIdentifier::HostId(host_ids[1]), |
| 84 | + 1, |
| 85 | + &session, |
| 86 | + &mut running_proxy, |
| 87 | + ) |
| 88 | + .await; |
| 89 | + assert_matches!( |
| 90 | + result, |
| 91 | + Err(ExecutionError::SchemaAgreementError( |
| 92 | + SchemaAgreementError::RequestError( |
| 93 | + RequestAttemptError::BrokenConnectionError(_) |
| 94 | + ) |
| 95 | + )) |
| 96 | + ) |
| 97 | + } |
| 98 | + |
| 99 | + { |
| 100 | + // Case 2: Paused node is NOT a coordinator for DDL. |
| 101 | + // DDL should succeed, because auto schema agreement only needs available nodes to agree. |
| 102 | + let result = run_some_ddl_with_unreachable_node( |
| 103 | + NodeIdentifier::HostId(host_ids[2]), |
| 104 | + 1, |
| 105 | + &session, |
| 106 | + &mut running_proxy, |
| 107 | + ) |
| 108 | + .await; |
| 109 | + assert_matches!(result, Ok(_)) |
| 110 | + } |
| 111 | + |
| 112 | + { |
| 113 | + // Case 3: Paused node is a coordinator for DDL, and is used by control connection. |
| 114 | + // It is the same as case 1, but paused node is also control connection. |
| 115 | + // DDL needs to fail. |
| 116 | + let result = run_some_ddl_with_unreachable_node( |
| 117 | + NodeIdentifier::HostId(host_ids[0]), |
| 118 | + 0, |
| 119 | + &session, |
| 120 | + &mut running_proxy, |
| 121 | + ) |
| 122 | + .await; |
| 123 | + assert_matches!( |
| 124 | + result, |
| 125 | + Err(ExecutionError::SchemaAgreementError( |
| 126 | + SchemaAgreementError::RequestError( |
| 127 | + RequestAttemptError::BrokenConnectionError(_) |
| 128 | + ) |
| 129 | + )) |
| 130 | + ) |
| 131 | + } |
| 132 | + |
| 133 | + { |
| 134 | + // Case 4: Paused node is NOT a coordinator for DDL, but is used by control connection. |
| 135 | + // It is the same as case 2, but paused node is also control connection. |
| 136 | + // DDL should succeed, because auto schema agreement only needs available nodes to agree, |
| 137 | + // and control connection is not used for that at all. |
| 138 | + let result = run_some_ddl_with_unreachable_node( |
| 139 | + NodeIdentifier::HostId(host_ids[1]), |
| 140 | + 0, |
| 141 | + &session, |
| 142 | + &mut running_proxy, |
| 143 | + ) |
| 144 | + .await; |
| 145 | + assert_matches!(result, Ok(_)) |
| 146 | + } |
| 147 | + |
| 148 | + running_proxy |
| 149 | + }, |
| 150 | + ) |
| 151 | + .await; |
| 152 | + |
| 153 | + match res { |
| 154 | + Ok(()) => (), |
| 155 | + Err(ProxyError::Worker(WorkerError::DriverDisconnected(_))) => (), |
| 156 | + Err(err) => panic!("{}", err), |
| 157 | + } |
| 158 | +} |
0 commit comments