Skip to content

Commit 1cb9e74

Browse files
HADOOP-17618. ABFS: Partially obfuscate SAS object IDs in Logs (#2845)
Contributed by Sumangala Patki (cherry picked from commit 3450522)
1 parent 59a955d commit 1cb9e74

File tree

8 files changed

+273
-78
lines changed

8 files changed

+273
-78
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,5 +41,11 @@ public final class HttpQueryParams {
4141
public static final String QUERY_PARAM_UPN = "upn";
4242
public static final String QUERY_PARAM_BLOBTYPE = "blobtype";
4343

44+
//query params for SAS
45+
public static final String QUERY_PARAM_SAOID = "saoid";
46+
public static final String QUERY_PARAM_SKOID = "skoid";
47+
public static final String QUERY_PARAM_SUOID = "suoid";
48+
public static final String QUERY_PARAM_SIGNATURE = "sig";
49+
4450
private HttpQueryParams() {}
4551
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsRestOperationException.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,15 +87,15 @@ private static String formatMessage(final AbfsHttpOperation abfsHttpOperation) {
8787
"Operation failed: \"%1$s\", %2$s, HEAD, %3$s",
8888
abfsHttpOperation.getStatusDescription(),
8989
abfsHttpOperation.getStatusCode(),
90-
abfsHttpOperation.getSignatureMaskedUrl());
90+
abfsHttpOperation.getMaskedUrl());
9191
}
9292

9393
return String.format(
9494
"Operation failed: \"%1$s\", %2$s, %3$s, %4$s, %5$s, \"%6$s\"",
9595
abfsHttpOperation.getStatusDescription(),
9696
abfsHttpOperation.getStatusCode(),
9797
abfsHttpOperation.getMethod(),
98-
abfsHttpOperation.getSignatureMaskedUrl(),
98+
abfsHttpOperation.getMaskedUrl(),
9999
abfsHttpOperation.getStorageErrorCode(),
100100
// Remove break line to ensure the request id and timestamp can be shown in console.
101101
abfsHttpOperation.getStorageErrorMessage().replaceAll("\\n", " "));

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java

Lines changed: 27 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,15 @@
2121
import java.io.IOException;
2222
import java.io.InputStream;
2323
import java.io.OutputStream;
24-
import java.io.UnsupportedEncodingException;
2524
import java.net.HttpURLConnection;
2625
import java.net.URL;
27-
import java.net.URLEncoder;
2826
import java.util.List;
2927
import java.util.UUID;
3028

3129
import javax.net.ssl.HttpsURLConnection;
3230
import javax.net.ssl.SSLSocketFactory;
3331

32+
import org.apache.hadoop.fs.azurebfs.utils.UriUtils;
3433
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
3534
import org.codehaus.jackson.JsonFactory;
3635
import org.codehaus.jackson.JsonParser;
@@ -51,8 +50,6 @@
5150
public class AbfsHttpOperation implements AbfsPerfLoggable {
5251
private static final Logger LOG = LoggerFactory.getLogger(AbfsHttpOperation.class);
5352

54-
public static final String SIGNATURE_QUERY_PARAM_KEY = "sig=";
55-
5653
private static final int CONNECT_TIMEOUT = 30 * 1000;
5754
private static final int READ_TIMEOUT = 30 * 1000;
5855

@@ -85,6 +82,7 @@ public class AbfsHttpOperation implements AbfsPerfLoggable {
8582
private long connectionTimeMs;
8683
private long sendRequestTimeMs;
8784
private long recvResponseTimeMs;
85+
private boolean shouldMask = false;
8886

8987
public static AbfsHttpOperation getAbfsHttpOperationWithFixedResult(
9088
final URL url,
@@ -150,6 +148,10 @@ public String getRequestId() {
150148
return requestId;
151149
}
152150

151+
public void setMaskForSAS() {
152+
shouldMask = true;
153+
}
154+
153155
public int getBytesSent() {
154156
return bytesSent;
155157
}
@@ -194,7 +196,7 @@ public String toString() {
194196
sb.append(",");
195197
sb.append(method);
196198
sb.append(",");
197-
sb.append(getSignatureMaskedUrl());
199+
sb.append(getMaskedUrl());
198200
return sb.toString();
199201
}
200202

@@ -227,11 +229,30 @@ public String getLogString() {
227229
.append(" m=")
228230
.append(method)
229231
.append(" u=")
230-
.append(getSignatureMaskedEncodedUrl());
232+
.append(getMaskedEncodedUrl());
231233

232234
return sb.toString();
233235
}
234236

237+
public String getMaskedUrl() {
238+
if (!shouldMask) {
239+
return url.toString();
240+
}
241+
if (maskedUrl != null) {
242+
return maskedUrl;
243+
}
244+
maskedUrl = UriUtils.getMaskedUrl(url);
245+
return maskedUrl;
246+
}
247+
248+
public String getMaskedEncodedUrl() {
249+
if (maskedEncodedUrl != null) {
250+
return maskedEncodedUrl;
251+
}
252+
maskedEncodedUrl = UriUtils.encodedUrlStr(getMaskedUrl());
253+
return maskedEncodedUrl;
254+
}
255+
235256
/**
236257
* Initializes a new HTTP request and opens the connection.
237258
*
@@ -521,43 +542,6 @@ private boolean isNullInputStream(InputStream stream) {
521542
return stream == null ? true : false;
522543
}
523544

524-
public static String getSignatureMaskedUrl(String url) {
525-
int qpStrIdx = url.indexOf('?' + SIGNATURE_QUERY_PARAM_KEY);
526-
if (qpStrIdx == -1) {
527-
qpStrIdx = url.indexOf('&' + SIGNATURE_QUERY_PARAM_KEY);
528-
}
529-
if (qpStrIdx == -1) {
530-
return url;
531-
}
532-
final int sigStartIdx = qpStrIdx + SIGNATURE_QUERY_PARAM_KEY.length() + 1;
533-
final int ampIdx = url.indexOf("&", sigStartIdx);
534-
final int sigEndIndex = (ampIdx != -1) ? ampIdx : url.length();
535-
String signature = url.substring(sigStartIdx, sigEndIndex);
536-
return url.replace(signature, "XXXX");
537-
}
538-
539-
public static String encodedUrlStr(String url) {
540-
try {
541-
return URLEncoder.encode(url, "UTF-8");
542-
} catch (UnsupportedEncodingException e) {
543-
return "https%3A%2F%2Ffailed%2Fto%2Fencode%2Furl";
544-
}
545-
}
546-
547-
public String getSignatureMaskedUrl() {
548-
if (this.maskedUrl == null) {
549-
this.maskedUrl = getSignatureMaskedUrl(this.url.toString());
550-
}
551-
return this.maskedUrl;
552-
}
553-
554-
public String getSignatureMaskedEncodedUrl() {
555-
if (this.maskedEncodedUrl == null) {
556-
this.maskedEncodedUrl = encodedUrlStr(getSignatureMaskedUrl());
557-
}
558-
return this.maskedEncodedUrl;
559-
}
560-
561545
public static class AbfsHttpOperationWithFixedResult extends AbfsHttpOperation {
562546
/**
563547
* Creates an instance to represent fixed results.

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ private boolean executeHttpOperation(final int retryCount) throws AzureBlobFileS
242242
break;
243243
case SAS:
244244
// do nothing; the SAS token should already be appended to the query string
245+
httpOperation.setMaskForSAS(); //mask sig/oid from url for logs
245246
break;
246247
case SharedKey:
247248
// sign the HTTP request

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,42 @@
1818

1919
package org.apache.hadoop.fs.azurebfs.utils;
2020

21+
import java.io.UnsupportedEncodingException;
22+
import java.net.URL;
23+
import java.net.URLEncoder;
24+
import java.nio.charset.StandardCharsets;
25+
import java.util.Arrays;
26+
import java.util.Collections;
27+
import java.util.HashSet;
28+
import java.util.List;
29+
import java.util.Set;
2130
import java.util.regex.Pattern;
2231

32+
import org.apache.commons.lang3.StringUtils;
33+
import org.apache.http.NameValuePair;
34+
import org.apache.http.client.utils.URLEncodedUtils;
35+
36+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.AND_MARK;
37+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EQUAL;
38+
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SAOID;
39+
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SIGNATURE;
40+
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SKOID;
41+
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SUOID;
42+
2343
/**
2444
* Utility class to help with Abfs url transformation to blob urls.
2545
*/
2646
public final class UriUtils {
2747
private static final String ABFS_URI_REGEX = "[^.]+\\.dfs\\.(preprod\\.){0,1}core\\.windows\\.net";
2848
private static final Pattern ABFS_URI_PATTERN = Pattern.compile(ABFS_URI_REGEX);
49+
private static final Set<String> FULL_MASK_PARAM_KEYS = new HashSet<>(
50+
Collections.singleton(QUERY_PARAM_SIGNATURE));
51+
private static final Set<String> PARTIAL_MASK_PARAM_KEYS = new HashSet<>(
52+
Arrays.asList(QUERY_PARAM_SKOID, QUERY_PARAM_SAOID, QUERY_PARAM_SUOID));
53+
private static final Character CHAR_MASK = 'X';
54+
private static final String FULL_MASK = "XXXXX";
55+
private static final int DEFAULT_QUERY_STRINGBUILDER_CAPACITY = 550;
56+
private static final int PARTIAL_MASK_VISIBLE_LEN = 18;
2957

3058
/**
3159
* Checks whether a string includes abfs url.
@@ -73,6 +101,74 @@ public static String generateUniqueTestPath() {
73101
return testUniqueForkId == null ? "/test" : "/" + testUniqueForkId + "/test";
74102
}
75103

104+
public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
105+
Set<String> queryParamsForFullMask,
106+
Set<String> queryParamsForPartialMask) {
107+
return maskUrlQueryParameters(keyValueList, queryParamsForFullMask,
108+
queryParamsForPartialMask, DEFAULT_QUERY_STRINGBUILDER_CAPACITY);
109+
}
110+
111+
/**
112+
* Generic function to mask a set of query parameters partially/fully and
113+
* return the resultant query string
114+
* @param keyValueList List of NameValuePair instances for query keys/values
115+
* @param queryParamsForFullMask values for these params will appear as "XXXX"
116+
* @param queryParamsForPartialMask values will be masked with 'X', except for
117+
* the last PARTIAL_MASK_VISIBLE_LEN characters
118+
* @param queryLen to initialize StringBuilder for the masked query
119+
* @return the masked url query part
120+
*/
121+
public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
122+
Set<String> queryParamsForFullMask,
123+
Set<String> queryParamsForPartialMask, int queryLen) {
124+
StringBuilder maskedUrl = new StringBuilder(queryLen);
125+
for (NameValuePair keyValuePair : keyValueList) {
126+
String key = keyValuePair.getName();
127+
if (key.isEmpty()) {
128+
throw new IllegalArgumentException("Query param key should not be empty");
129+
}
130+
String value = keyValuePair.getValue();
131+
maskedUrl.append(key);
132+
maskedUrl.append(EQUAL);
133+
if (value != null && !value.isEmpty()) { //no mask
134+
if (queryParamsForFullMask.contains(key)) {
135+
maskedUrl.append(FULL_MASK);
136+
} else if (queryParamsForPartialMask.contains(key)) {
137+
int valueLen = value.length();
138+
int maskedLen = valueLen > PARTIAL_MASK_VISIBLE_LEN
139+
? PARTIAL_MASK_VISIBLE_LEN : valueLen / 2;
140+
maskedUrl.append(value, 0, valueLen - maskedLen);
141+
maskedUrl.append(StringUtils.repeat(CHAR_MASK, maskedLen));
142+
} else {
143+
maskedUrl.append(value);
144+
}
145+
}
146+
maskedUrl.append(AND_MARK);
147+
}
148+
maskedUrl.deleteCharAt(maskedUrl.length() - 1);
149+
return maskedUrl.toString();
150+
}
151+
152+
public static String encodedUrlStr(String url) {
153+
try {
154+
return URLEncoder.encode(url, "UTF-8");
155+
} catch (UnsupportedEncodingException e) {
156+
return "https%3A%2F%2Ffailed%2Fto%2Fencode%2Furl";
157+
}
158+
}
159+
160+
public static String getMaskedUrl(URL url) {
161+
String queryString = url.getQuery();
162+
if (queryString == null) {
163+
return url.toString();
164+
}
165+
List<NameValuePair> queryKeyValueList = URLEncodedUtils
166+
.parse(queryString, StandardCharsets.UTF_8);
167+
String maskedQueryString = maskUrlQueryParameters(queryKeyValueList,
168+
FULL_MASK_PARAM_KEYS, PARTIAL_MASK_PARAM_KEYS, queryString.length());
169+
return url.toString().replace(queryString, maskedQueryString);
170+
}
171+
76172
private UriUtils() {
77173
}
78174
}

hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -400,14 +400,14 @@ public void testSignatureMask() throws Exception {
400400
AbfsRestOperation abfsHttpRestOperation = fs.getAbfsClient()
401401
.renamePath(src, "/testABC" + "/abc.txt", null);
402402
AbfsHttpOperation result = abfsHttpRestOperation.getResult();
403-
String url = result.getSignatureMaskedUrl();
404-
String encodedUrl = result.getSignatureMaskedEncodedUrl();
403+
String url = result.getMaskedUrl();
404+
String encodedUrl = result.getMaskedEncodedUrl();
405405
Assertions.assertThat(url.substring(url.indexOf("sig=")))
406406
.describedAs("Signature query param should be masked")
407-
.startsWith("sig=XXXX");
407+
.startsWith("sig=XXXXX");
408408
Assertions.assertThat(encodedUrl.substring(encodedUrl.indexOf("sig%3D")))
409409
.describedAs("Signature query param should be masked")
410-
.startsWith("sig%3DXXXX");
410+
.startsWith("sig%3DXXXXX");
411411
}
412412

413413
@Test

0 commit comments

Comments
 (0)