diff --git a/src/main/java/com/github/packageurl/PackageURL.java b/src/main/java/com/github/packageurl/PackageURL.java
index a474651f..b8316d95 100644
--- a/src/main/java/com/github/packageurl/PackageURL.java
+++ b/src/main/java/com/github/packageurl/PackageURL.java
@@ -21,6 +21,9 @@
*/
package com.github.packageurl;
+import static com.github.packageurl.internal.StringUtil.FRAGMENTCHAR;
+import static com.github.packageurl.internal.StringUtil.PCHAR;
+import static com.github.packageurl.internal.StringUtil.QUERYCHAR;
import static java.util.Objects.requireNonNull;
import com.github.packageurl.internal.StringUtil;
@@ -28,6 +31,7 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
+import java.util.BitSet;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
@@ -451,6 +455,7 @@ private static String validateName(final String type, final String value) throws
validateKey(key);
validateValue(key, entry.getValue());
}
+
return values;
}
@@ -531,12 +536,12 @@ private String canonicalize(boolean coordinatesOnly) {
final StringBuilder purl = new StringBuilder();
purl.append(SCHEME_PART).append(type).append('/');
if (namespace != null) {
- purl.append(encodePath(namespace));
+ purl.append(encodePath(namespace, PCHAR));
purl.append('/');
}
- purl.append(StringUtil.percentEncode(name));
+ purl.append(StringUtil.percentEncode(name, PCHAR));
if (version != null) {
- purl.append('@').append(StringUtil.percentEncode(version));
+ purl.append('@').append(StringUtil.percentEncode(version, PCHAR));
}
if (!coordinatesOnly) {
@@ -550,12 +555,12 @@ private String canonicalize(boolean coordinatesOnly) {
}
purl.append(entry.getKey());
purl.append('=');
- purl.append(StringUtil.percentEncode(entry.getValue()));
+ purl.append(StringUtil.percentEncode(entry.getValue(), QUERYCHAR));
separator = true;
}
}
if (subpath != null) {
- purl.append('#').append(encodePath(subpath));
+ purl.append('#').append(encodePath(subpath, FRAGMENTCHAR));
}
}
return purl.toString();
@@ -584,7 +589,7 @@ private static void verifyTypeConstraints(String type, @Nullable String namespac
}
try {
- final TreeMap results = qualifiers.entrySet().stream()
+ final Map results = qualifiers.entrySet().stream()
.filter(entry -> !isEmpty(entry.getValue()))
.collect(
TreeMap::new,
@@ -596,8 +601,7 @@ private static void verifyTypeConstraints(String type, @Nullable String namespac
}
}
- @SuppressWarnings("StringSplitter") // reason: surprising behavior is okay in this case
- private static @Nullable Map parseQualifiers(final String encodedString)
+ static @Nullable Map parseQualifiers(final String encodedString)
throws MalformedPackageURLException {
try {
final TreeMap results = Arrays.stream(encodedString.split("&"))
@@ -628,8 +632,10 @@ private static String[] parsePath(final String path, final boolean isSubpath) {
.toArray(String[]::new);
}
- private static String encodePath(final String path) {
- return Arrays.stream(path.split("/")).map(StringUtil::percentEncode).collect(Collectors.joining("/"));
+ private static String encodePath(final String path, BitSet unreservedChars) {
+ return Arrays.stream(path.split("/"))
+ .map(segment -> StringUtil.percentEncode(segment, unreservedChars))
+ .collect(Collectors.joining("/"));
}
/**
diff --git a/src/main/java/com/github/packageurl/internal/StringUtil.java b/src/main/java/com/github/packageurl/internal/StringUtil.java
index 5225ce1d..5f1ada91 100644
--- a/src/main/java/com/github/packageurl/internal/StringUtil.java
+++ b/src/main/java/com/github/packageurl/internal/StringUtil.java
@@ -25,6 +25,8 @@
import com.github.packageurl.ValidationException;
import java.nio.charset.StandardCharsets;
+import java.util.BitSet;
+import java.util.stream.IntStream;
import org.jspecify.annotations.NonNull;
/**
@@ -33,25 +35,110 @@
* @since 2.0.0
*/
public final class StringUtil {
-
private static final byte PERCENT_CHAR = '%';
- private static final boolean[] UNRESERVED_CHARS = new boolean[128];
+ private static final int NBITS = 128;
+
+ private static final BitSet DIGIT = new BitSet(NBITS);
static {
- for (char c = '0'; c <= '9'; c++) {
- UNRESERVED_CHARS[c] = true;
- }
- for (char c = 'A'; c <= 'Z'; c++) {
- UNRESERVED_CHARS[c] = true;
- }
- for (char c = 'a'; c <= 'z'; c++) {
- UNRESERVED_CHARS[c] = true;
- }
- UNRESERVED_CHARS['-'] = true;
- UNRESERVED_CHARS['.'] = true;
- UNRESERVED_CHARS['_'] = true;
- UNRESERVED_CHARS['~'] = true;
+ IntStream.rangeClosed('0', '9').forEach(DIGIT::set);
+ }
+
+ private static final BitSet LOWER = new BitSet(NBITS);
+
+ static {
+ IntStream.rangeClosed('a', 'z').forEach(LOWER::set);
+ }
+
+ private static final BitSet UPPER = new BitSet(NBITS);
+
+ static {
+ IntStream.rangeClosed('A', 'Z').forEach(UPPER::set);
+ }
+
+ private static final BitSet ALPHA = new BitSet(NBITS);
+
+ static {
+ ALPHA.or(LOWER);
+ ALPHA.or(UPPER);
+ }
+
+ private static final BitSet ALPHA_DIGIT = new BitSet(NBITS);
+
+ static {
+ ALPHA_DIGIT.or(ALPHA);
+ ALPHA_DIGIT.or(DIGIT);
+ }
+
+ private static final BitSet UNRESERVED = new BitSet(NBITS);
+
+ static {
+ UNRESERVED.or(ALPHA_DIGIT);
+ UNRESERVED.set('-');
+ UNRESERVED.set('.');
+ UNRESERVED.set('_');
+ UNRESERVED.set('~');
+ }
+
+ private static final BitSet GEN_DELIMS = new BitSet(NBITS);
+
+ static {
+ GEN_DELIMS.set(':');
+ GEN_DELIMS.set('/');
+ GEN_DELIMS.set('?');
+ GEN_DELIMS.set('#');
+ GEN_DELIMS.set('[');
+ GEN_DELIMS.set(']');
+ GEN_DELIMS.set('@');
+ }
+
+ private static final BitSet SUB_DELIMS = new BitSet(NBITS);
+
+ static {
+ SUB_DELIMS.set('!');
+ SUB_DELIMS.set('$');
+ SUB_DELIMS.set('&');
+ SUB_DELIMS.set('\'');
+ SUB_DELIMS.set('(');
+ SUB_DELIMS.set(')');
+ SUB_DELIMS.set('*');
+ SUB_DELIMS.set('+');
+ SUB_DELIMS.set(',');
+ SUB_DELIMS.set(';');
+ SUB_DELIMS.set('=');
+ }
+
+ public static final BitSet PCHAR = new BitSet(NBITS);
+
+ static {
+ PCHAR.or(UNRESERVED);
+ PCHAR.or(SUB_DELIMS);
+ PCHAR.set(':');
+ PCHAR.clear('&'); // XXX: Why?
+ }
+
+ public static final BitSet QUERYCHAR = new BitSet(NBITS);
+
+ static {
+ QUERYCHAR.or(GEN_DELIMS);
+ QUERYCHAR.or(PCHAR);
+ QUERYCHAR.set('/');
+ QUERYCHAR.set('?');
+ QUERYCHAR.clear('#');
+ QUERYCHAR.clear('&');
+ QUERYCHAR.clear('=');
+ }
+
+ public static final BitSet FRAGMENTCHAR = new BitSet(NBITS);
+
+ static {
+ FRAGMENTCHAR.or(GEN_DELIMS);
+ FRAGMENTCHAR.or(PCHAR);
+ FRAGMENTCHAR.set('/');
+ FRAGMENTCHAR.set('?');
+ FRAGMENTCHAR.set('&');
+ FRAGMENTCHAR.clear('#');
}
private StringUtil() {
@@ -121,8 +208,8 @@ private StringUtil() {
*
* @since 2.0.0
*/
- public static @NonNull String percentEncode(@NonNull final String source) {
- if (!shouldEncode(source)) {
+ public static @NonNull String percentEncode(@NonNull final String source, BitSet unreservedChars) {
+ if (!shouldEncode(source, unreservedChars)) {
return source;
}
@@ -131,7 +218,7 @@ private StringUtil() {
int writePos = 0;
for (byte b : src) {
- if (shouldEncode(toUnsignedInt(b))) {
+ if (shouldEncode(toUnsignedInt(b), unreservedChars)) {
dest[writePos++] = PERCENT_CHAR;
dest[writePos++] = toHexDigit(b >> 4);
dest[writePos++] = toHexDigit(b);
@@ -191,20 +278,24 @@ private static byte toHexDigit(int b) {
*
* @param c non-negative integer.
*/
- private static boolean isUnreserved(int c) {
- return c < 128 && UNRESERVED_CHARS[c];
+ private static boolean isUnreserved(int c, BitSet unreservedChars) {
+ if (c < 0 || c >= unreservedChars.length()) {
+ return false;
+ }
+
+ return unreservedChars.get(c);
}
/**
* @param c non-negative integer
*/
- private static boolean shouldEncode(int c) {
- return !isUnreserved(c);
+ private static boolean shouldEncode(int c, BitSet unreservedChars) {
+ return !isUnreserved(c, unreservedChars);
}
- private static boolean shouldEncode(String s) {
+ private static boolean shouldEncode(String s, BitSet unreservedChars) {
for (int i = 0, length = s.length(); i < length; i++) {
- if (shouldEncode(s.charAt(i))) {
+ if (shouldEncode(s.charAt(i), unreservedChars)) {
return true;
}
}
diff --git a/src/test/java/com/github/packageurl/PackageURLTest.java b/src/test/java/com/github/packageurl/PackageURLTest.java
index 71d42eaa..5cb25a6b 100644
--- a/src/test/java/com/github/packageurl/PackageURLTest.java
+++ b/src/test/java/com/github/packageurl/PackageURLTest.java
@@ -28,7 +28,12 @@
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Arrays;
import java.util.Locale;
+import java.util.Map;
+import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.jspecify.annotations.Nullable;
import org.junit.jupiter.api.AfterAll;
@@ -272,4 +277,39 @@ void npmCaseSensitive() throws Exception {
assertEquals("Base64", base64Uppercase.getName());
assertEquals("1.0.0", base64Uppercase.getVersion());
}
+
+ @Test
+ void uriEncode() throws URISyntaxException, MalformedPackageURLException {
+ String genDelims = "?#[]@"; // /
+ String subDelims = "!$&'()*+,;=";
+ String pchar = "/" + genDelims + subDelims + ":";
+ String query = "key=" + pchar.replace("=", "%3D").replace("&", "%26") + "/?";
+ String fragment = pchar + "/?";
+ String scheme = "pkg";
+ String type = "generic";
+ String subpath = fragment.replaceFirst("^/+", "");
+ URI uri = new URI(scheme, type, pchar, query, subpath);
+ PackageURL purl = new PackageURL(uri.toASCIIString());
+ Map qualifiers = Arrays.stream(query.split("&"))
+ .map(kv -> kv.split("="))
+ .filter(kvArray -> kvArray.length == 2)
+ .collect(Collectors.toMap(kv -> kv[0], kv -> kv[1]));
+ PackageURL purl2 = PackageURLBuilder.aPackageURL()
+ .withType(type)
+ .withNamespace("")
+ .withName(genDelims.replace("@", ""))
+ .withVersion(subDelims + ":")
+ .withQualifiers(qualifiers)
+ .withSubpath(subpath)
+ .build();
+ assertEquals(purl, purl2);
+ assertEquals(
+ uri.getQuery(),
+ purl.getQualifiers().entrySet().stream()
+ .map(Map.Entry::toString)
+ .collect(Collectors.joining("&")));
+ assertEquals(uri.getFragment(), purl.getSubpath());
+ assertEquals(uri.getPath(), "/" + purl.getName() + '@' + purl.getVersion());
+ assertEquals(uri.toASCIIString().replace("pkg://", "pkg:").replaceFirst("&", "%26"), purl.toString());
+ }
}
diff --git a/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java b/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java
index e05b5349..678ac5b6 100644
--- a/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java
+++ b/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java
@@ -21,6 +21,8 @@
*/
package com.github.packageurl.internal;
+import static com.github.packageurl.internal.StringUtil.PCHAR;
+
import java.nio.charset.StandardCharsets;
import java.util.Locale;
import java.util.Random;
@@ -92,7 +94,7 @@ private String[] createDecodedData() {
private static String[] encodeData(String[] decodedData) {
String[] encodedData = new String[decodedData.length];
for (int i = 0; i < encodedData.length; i++) {
- encodedData[i] = StringUtil.percentEncode(decodedData[i]);
+ encodedData[i] = StringUtil.percentEncode(decodedData[i], PCHAR);
if (!StringUtil.percentDecode(encodedData[i]).equals(decodedData[i])) {
throw new RuntimeException(
"Invalid implementation of `percentEncode` and `percentDecode`.\nOriginal data: "
@@ -139,7 +141,7 @@ public void percentDecode(final Blackhole blackhole) {
@Benchmark
public void percentEncode(final Blackhole blackhole) {
for (int i = 0; i < DATA_COUNT; i++) {
- blackhole.consume(StringUtil.percentEncode(decodedData[i]));
+ blackhole.consume(StringUtil.percentEncode(decodedData[i], PCHAR));
}
}
}
diff --git a/src/test/resources/test-suite-data.json b/src/test/resources/test-suite-data.json
index 2eb9b3b9..826ba491 100644
--- a/src/test/resources/test-suite-data.json
+++ b/src/test/resources/test-suite-data.json
@@ -86,7 +86,7 @@
{
"description": "docker uses qualifiers and hash image id as versions",
"purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io",
- "canonical_purl": "pkg:docker/customer/dockerimage@sha256%3A244fd47e07d1004f0aed9c?repository_url=gcr.io",
+ "canonical_purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io",
"type": "docker",
"namespace": "customer",
"name": "dockerimage",
@@ -110,7 +110,7 @@
{
"description": "maven often uses qualifiers",
"purl": "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?repositorY_url=repo.spring.io/release&classifier=sources",
- "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&repository_url=repo.spring.io%2Frelease",
+ "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&repository_url=repo.spring.io/release",
"type": "maven",
"namespace": "org.apache.xmlgraphics",
"name": "batik-anim",
@@ -122,7 +122,7 @@
{
"description": "maven pom reference",
"purl": "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?repositorY_url=repo.spring.io/release&extension=pom",
- "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&repository_url=repo.spring.io%2Frelease",
+ "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&repository_url=repo.spring.io/release",
"type": "maven",
"namespace": "org.apache.xmlgraphics",
"name": "batik-anim",
@@ -314,7 +314,7 @@
{
"description": "valid debian purl containing a plus in the name and version",
"purl": "pkg:deb/debian/g++-10@10.2.1+6",
- "canonical_purl": "pkg:deb/debian/g%2B%2B-10@10.2.1%2B6",
+ "canonical_purl": "pkg:deb/debian/g++-10@10.2.1+6",
"type": "deb",
"namespace": "debian",
"name": "g++-10",