Skip to content

Commit 3450522

Browse files
HADOOP-17618. ABFS: Partially obfuscate SAS object IDs in Logs (apache#2845)
Contributed by Sumangala Patki
1 parent 44bab51 commit 3450522

File tree

8 files changed

+273
-78
lines changed

8 files changed

+273
-78
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,5 +41,11 @@ public final class HttpQueryParams {
4141
public static final String QUERY_PARAM_UPN = "upn";
4242
public static final String QUERY_PARAM_BLOBTYPE = "blobtype";
4343

44+
//query params for SAS
45+
public static final String QUERY_PARAM_SAOID = "saoid";
46+
public static final String QUERY_PARAM_SKOID = "skoid";
47+
public static final String QUERY_PARAM_SUOID = "suoid";
48+
public static final String QUERY_PARAM_SIGNATURE = "sig";
49+
4450
private HttpQueryParams() {}
4551
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsRestOperationException.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,15 +87,15 @@ private static String formatMessage(final AbfsHttpOperation abfsHttpOperation) {
8787
"Operation failed: \"%1$s\", %2$s, HEAD, %3$s",
8888
abfsHttpOperation.getStatusDescription(),
8989
abfsHttpOperation.getStatusCode(),
90-
abfsHttpOperation.getSignatureMaskedUrl());
90+
abfsHttpOperation.getMaskedUrl());
9191
}
9292

9393
return String.format(
9494
"Operation failed: \"%1$s\", %2$s, %3$s, %4$s, %5$s, \"%6$s\"",
9595
abfsHttpOperation.getStatusDescription(),
9696
abfsHttpOperation.getStatusCode(),
9797
abfsHttpOperation.getMethod(),
98-
abfsHttpOperation.getSignatureMaskedUrl(),
98+
abfsHttpOperation.getMaskedUrl(),
9999
abfsHttpOperation.getStorageErrorCode(),
100100
// Remove break line to ensure the request id and timestamp can be shown in console.
101101
abfsHttpOperation.getStorageErrorMessage().replaceAll("\\n", " "));

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java

Lines changed: 27 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,14 @@
2121
import java.io.IOException;
2222
import java.io.InputStream;
2323
import java.io.OutputStream;
24-
import java.io.UnsupportedEncodingException;
2524
import java.net.HttpURLConnection;
2625
import java.net.URL;
27-
import java.net.URLEncoder;
2826
import java.util.List;
2927

3028
import javax.net.ssl.HttpsURLConnection;
3129
import javax.net.ssl.SSLSocketFactory;
3230

31+
import org.apache.hadoop.fs.azurebfs.utils.UriUtils;
3332
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
3433
import org.codehaus.jackson.JsonFactory;
3534
import org.codehaus.jackson.JsonParser;
@@ -50,8 +49,6 @@
5049
public class AbfsHttpOperation implements AbfsPerfLoggable {
5150
private static final Logger LOG = LoggerFactory.getLogger(AbfsHttpOperation.class);
5251

53-
public static final String SIGNATURE_QUERY_PARAM_KEY = "sig=";
54-
5552
private static final int CONNECT_TIMEOUT = 30 * 1000;
5653
private static final int READ_TIMEOUT = 30 * 1000;
5754

@@ -83,6 +80,7 @@ public class AbfsHttpOperation implements AbfsPerfLoggable {
8380
private long connectionTimeMs;
8481
private long sendRequestTimeMs;
8582
private long recvResponseTimeMs;
83+
private boolean shouldMask = false;
8684

8785
public static AbfsHttpOperation getAbfsHttpOperationWithFixedResult(
8886
final URL url,
@@ -149,6 +147,10 @@ public String getRequestId() {
149147
return requestId;
150148
}
151149

150+
public void setMaskForSAS() {
151+
shouldMask = true;
152+
}
153+
152154
public int getBytesSent() {
153155
return bytesSent;
154156
}
@@ -193,7 +195,7 @@ public String toString() {
193195
sb.append(",");
194196
sb.append(method);
195197
sb.append(",");
196-
sb.append(getSignatureMaskedUrl());
198+
sb.append(getMaskedUrl());
197199
return sb.toString();
198200
}
199201

@@ -226,11 +228,30 @@ public String getLogString() {
226228
.append(" m=")
227229
.append(method)
228230
.append(" u=")
229-
.append(getSignatureMaskedEncodedUrl());
231+
.append(getMaskedEncodedUrl());
230232

231233
return sb.toString();
232234
}
233235

236+
public String getMaskedUrl() {
237+
if (!shouldMask) {
238+
return url.toString();
239+
}
240+
if (maskedUrl != null) {
241+
return maskedUrl;
242+
}
243+
maskedUrl = UriUtils.getMaskedUrl(url);
244+
return maskedUrl;
245+
}
246+
247+
public String getMaskedEncodedUrl() {
248+
if (maskedEncodedUrl != null) {
249+
return maskedEncodedUrl;
250+
}
251+
maskedEncodedUrl = UriUtils.encodedUrlStr(getMaskedUrl());
252+
return maskedEncodedUrl;
253+
}
254+
234255
/**
235256
* Initializes a new HTTP request and opens the connection.
236257
*
@@ -520,43 +541,6 @@ private boolean isNullInputStream(InputStream stream) {
520541
return stream == null ? true : false;
521542
}
522543

523-
public static String getSignatureMaskedUrl(String url) {
524-
int qpStrIdx = url.indexOf('?' + SIGNATURE_QUERY_PARAM_KEY);
525-
if (qpStrIdx == -1) {
526-
qpStrIdx = url.indexOf('&' + SIGNATURE_QUERY_PARAM_KEY);
527-
}
528-
if (qpStrIdx == -1) {
529-
return url;
530-
}
531-
final int sigStartIdx = qpStrIdx + SIGNATURE_QUERY_PARAM_KEY.length() + 1;
532-
final int ampIdx = url.indexOf("&", sigStartIdx);
533-
final int sigEndIndex = (ampIdx != -1) ? ampIdx : url.length();
534-
String signature = url.substring(sigStartIdx, sigEndIndex);
535-
return url.replace(signature, "XXXX");
536-
}
537-
538-
public static String encodedUrlStr(String url) {
539-
try {
540-
return URLEncoder.encode(url, "UTF-8");
541-
} catch (UnsupportedEncodingException e) {
542-
return "https%3A%2F%2Ffailed%2Fto%2Fencode%2Furl";
543-
}
544-
}
545-
546-
public String getSignatureMaskedUrl() {
547-
if (this.maskedUrl == null) {
548-
this.maskedUrl = getSignatureMaskedUrl(this.url.toString());
549-
}
550-
return this.maskedUrl;
551-
}
552-
553-
public String getSignatureMaskedEncodedUrl() {
554-
if (this.maskedEncodedUrl == null) {
555-
this.maskedEncodedUrl = encodedUrlStr(getSignatureMaskedUrl());
556-
}
557-
return this.maskedEncodedUrl;
558-
}
559-
560544
public static class AbfsHttpOperationWithFixedResult extends AbfsHttpOperation {
561545
/**
562546
* Creates an instance to represent fixed results.

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ private boolean executeHttpOperation(final int retryCount,
249249
break;
250250
case SAS:
251251
// do nothing; the SAS token should already be appended to the query string
252+
httpOperation.setMaskForSAS(); //mask sig/oid from url for logs
252253
break;
253254
case SharedKey:
254255
// sign the HTTP request

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,42 @@
1818

1919
package org.apache.hadoop.fs.azurebfs.utils;
2020

21+
import java.io.UnsupportedEncodingException;
22+
import java.net.URL;
23+
import java.net.URLEncoder;
24+
import java.nio.charset.StandardCharsets;
25+
import java.util.Arrays;
26+
import java.util.Collections;
27+
import java.util.HashSet;
28+
import java.util.List;
29+
import java.util.Set;
2130
import java.util.regex.Pattern;
2231

32+
import org.apache.commons.lang3.StringUtils;
33+
import org.apache.http.NameValuePair;
34+
import org.apache.http.client.utils.URLEncodedUtils;
35+
36+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.AND_MARK;
37+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EQUAL;
38+
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SAOID;
39+
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SIGNATURE;
40+
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SKOID;
41+
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SUOID;
42+
2343
/**
2444
* Utility class to help with Abfs url transformation to blob urls.
2545
*/
2646
public final class UriUtils {
2747
private static final String ABFS_URI_REGEX = "[^.]+\\.dfs\\.(preprod\\.){0,1}core\\.windows\\.net";
2848
private static final Pattern ABFS_URI_PATTERN = Pattern.compile(ABFS_URI_REGEX);
49+
private static final Set<String> FULL_MASK_PARAM_KEYS = new HashSet<>(
50+
Collections.singleton(QUERY_PARAM_SIGNATURE));
51+
private static final Set<String> PARTIAL_MASK_PARAM_KEYS = new HashSet<>(
52+
Arrays.asList(QUERY_PARAM_SKOID, QUERY_PARAM_SAOID, QUERY_PARAM_SUOID));
53+
private static final Character CHAR_MASK = 'X';
54+
private static final String FULL_MASK = "XXXXX";
55+
private static final int DEFAULT_QUERY_STRINGBUILDER_CAPACITY = 550;
56+
private static final int PARTIAL_MASK_VISIBLE_LEN = 18;
2957

3058
/**
3159
* Checks whether a string includes abfs url.
@@ -73,6 +101,74 @@ public static String generateUniqueTestPath() {
73101
return testUniqueForkId == null ? "/test" : "/" + testUniqueForkId + "/test";
74102
}
75103

104+
public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
105+
Set<String> queryParamsForFullMask,
106+
Set<String> queryParamsForPartialMask) {
107+
return maskUrlQueryParameters(keyValueList, queryParamsForFullMask,
108+
queryParamsForPartialMask, DEFAULT_QUERY_STRINGBUILDER_CAPACITY);
109+
}
110+
111+
/**
112+
* Generic function to mask a set of query parameters partially/fully and
113+
* return the resultant query string
114+
* @param keyValueList List of NameValuePair instances for query keys/values
115+
* @param queryParamsForFullMask values for these params will appear as "XXXX"
116+
* @param queryParamsForPartialMask values will be masked with 'X', except for
117+
* the last PARTIAL_MASK_VISIBLE_LEN characters
118+
* @param queryLen to initialize StringBuilder for the masked query
119+
* @return the masked url query part
120+
*/
121+
public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
122+
Set<String> queryParamsForFullMask,
123+
Set<String> queryParamsForPartialMask, int queryLen) {
124+
StringBuilder maskedUrl = new StringBuilder(queryLen);
125+
for (NameValuePair keyValuePair : keyValueList) {
126+
String key = keyValuePair.getName();
127+
if (key.isEmpty()) {
128+
throw new IllegalArgumentException("Query param key should not be empty");
129+
}
130+
String value = keyValuePair.getValue();
131+
maskedUrl.append(key);
132+
maskedUrl.append(EQUAL);
133+
if (value != null && !value.isEmpty()) { //no mask
134+
if (queryParamsForFullMask.contains(key)) {
135+
maskedUrl.append(FULL_MASK);
136+
} else if (queryParamsForPartialMask.contains(key)) {
137+
int valueLen = value.length();
138+
int maskedLen = valueLen > PARTIAL_MASK_VISIBLE_LEN
139+
? PARTIAL_MASK_VISIBLE_LEN : valueLen / 2;
140+
maskedUrl.append(value, 0, valueLen - maskedLen);
141+
maskedUrl.append(StringUtils.repeat(CHAR_MASK, maskedLen));
142+
} else {
143+
maskedUrl.append(value);
144+
}
145+
}
146+
maskedUrl.append(AND_MARK);
147+
}
148+
maskedUrl.deleteCharAt(maskedUrl.length() - 1);
149+
return maskedUrl.toString();
150+
}
151+
152+
public static String encodedUrlStr(String url) {
153+
try {
154+
return URLEncoder.encode(url, "UTF-8");
155+
} catch (UnsupportedEncodingException e) {
156+
return "https%3A%2F%2Ffailed%2Fto%2Fencode%2Furl";
157+
}
158+
}
159+
160+
public static String getMaskedUrl(URL url) {
161+
String queryString = url.getQuery();
162+
if (queryString == null) {
163+
return url.toString();
164+
}
165+
List<NameValuePair> queryKeyValueList = URLEncodedUtils
166+
.parse(queryString, StandardCharsets.UTF_8);
167+
String maskedQueryString = maskUrlQueryParameters(queryKeyValueList,
168+
FULL_MASK_PARAM_KEYS, PARTIAL_MASK_PARAM_KEYS, queryString.length());
169+
return url.toString().replace(queryString, maskedQueryString);
170+
}
171+
76172
private UriUtils() {
77173
}
78174
}

hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -403,14 +403,14 @@ public void testSignatureMask() throws Exception {
403403
.renamePath(src, "/testABC" + "/abc.txt", null,
404404
getTestTracingContext(fs, false));
405405
AbfsHttpOperation result = abfsHttpRestOperation.getResult();
406-
String url = result.getSignatureMaskedUrl();
407-
String encodedUrl = result.getSignatureMaskedEncodedUrl();
406+
String url = result.getMaskedUrl();
407+
String encodedUrl = result.getMaskedEncodedUrl();
408408
Assertions.assertThat(url.substring(url.indexOf("sig=")))
409409
.describedAs("Signature query param should be masked")
410-
.startsWith("sig=XXXX");
410+
.startsWith("sig=XXXXX");
411411
Assertions.assertThat(encodedUrl.substring(encodedUrl.indexOf("sig%3D")))
412412
.describedAs("Signature query param should be masked")
413-
.startsWith("sig%3DXXXX");
413+
.startsWith("sig%3DXXXXX");
414414
}
415415

416416
@Test

0 commit comments

Comments
 (0)