diff --git a/src/main/java/com/github/packageurl/PackageURL.java b/src/main/java/com/github/packageurl/PackageURL.java index a474651f..b8316d95 100644 --- a/src/main/java/com/github/packageurl/PackageURL.java +++ b/src/main/java/com/github/packageurl/PackageURL.java @@ -21,6 +21,9 @@ */ package com.github.packageurl; +import static com.github.packageurl.internal.StringUtil.FRAGMENTCHAR; +import static com.github.packageurl.internal.StringUtil.PCHAR; +import static com.github.packageurl.internal.StringUtil.QUERYCHAR; import static java.util.Objects.requireNonNull; import com.github.packageurl.internal.StringUtil; @@ -28,6 +31,7 @@ import java.net.URI; import java.net.URISyntaxException; import java.util.Arrays; +import java.util.BitSet; import java.util.Collections; import java.util.Map; import java.util.Objects; @@ -451,6 +455,7 @@ private static String validateName(final String type, final String value) throws validateKey(key); validateValue(key, entry.getValue()); } + return values; } @@ -531,12 +536,12 @@ private String canonicalize(boolean coordinatesOnly) { final StringBuilder purl = new StringBuilder(); purl.append(SCHEME_PART).append(type).append('/'); if (namespace != null) { - purl.append(encodePath(namespace)); + purl.append(encodePath(namespace, PCHAR)); purl.append('/'); } - purl.append(StringUtil.percentEncode(name)); + purl.append(StringUtil.percentEncode(name, PCHAR)); if (version != null) { - purl.append('@').append(StringUtil.percentEncode(version)); + purl.append('@').append(StringUtil.percentEncode(version, PCHAR)); } if (!coordinatesOnly) { @@ -550,12 +555,12 @@ private String canonicalize(boolean coordinatesOnly) { } purl.append(entry.getKey()); purl.append('='); - purl.append(StringUtil.percentEncode(entry.getValue())); + purl.append(StringUtil.percentEncode(entry.getValue(), QUERYCHAR)); separator = true; } } if (subpath != null) { - purl.append('#').append(encodePath(subpath)); + purl.append('#').append(encodePath(subpath, FRAGMENTCHAR)); } } return purl.toString(); @@ -584,7 +589,7 @@ private static void verifyTypeConstraints(String type, @Nullable String namespac } try { - final TreeMap results = qualifiers.entrySet().stream() + final Map results = qualifiers.entrySet().stream() .filter(entry -> !isEmpty(entry.getValue())) .collect( TreeMap::new, @@ -596,8 +601,7 @@ private static void verifyTypeConstraints(String type, @Nullable String namespac } } - @SuppressWarnings("StringSplitter") // reason: surprising behavior is okay in this case - private static @Nullable Map parseQualifiers(final String encodedString) + static @Nullable Map parseQualifiers(final String encodedString) throws MalformedPackageURLException { try { final TreeMap results = Arrays.stream(encodedString.split("&")) @@ -628,8 +632,10 @@ private static String[] parsePath(final String path, final boolean isSubpath) { .toArray(String[]::new); } - private static String encodePath(final String path) { - return Arrays.stream(path.split("/")).map(StringUtil::percentEncode).collect(Collectors.joining("/")); + private static String encodePath(final String path, BitSet unreservedChars) { + return Arrays.stream(path.split("/")) + .map(segment -> StringUtil.percentEncode(segment, unreservedChars)) + .collect(Collectors.joining("/")); } /** diff --git a/src/main/java/com/github/packageurl/internal/StringUtil.java b/src/main/java/com/github/packageurl/internal/StringUtil.java index 5225ce1d..5f1ada91 100644 --- a/src/main/java/com/github/packageurl/internal/StringUtil.java +++ b/src/main/java/com/github/packageurl/internal/StringUtil.java @@ -25,6 +25,8 @@ import com.github.packageurl.ValidationException; import java.nio.charset.StandardCharsets; +import java.util.BitSet; +import java.util.stream.IntStream; import org.jspecify.annotations.NonNull; /** @@ -33,25 +35,110 @@ * @since 2.0.0 */ public final class StringUtil { - private static final byte PERCENT_CHAR = '%'; - private static final boolean[] UNRESERVED_CHARS = new boolean[128]; + private static final int NBITS = 128; + + private static final BitSet DIGIT = new BitSet(NBITS); static { - for (char c = '0'; c <= '9'; c++) { - UNRESERVED_CHARS[c] = true; - } - for (char c = 'A'; c <= 'Z'; c++) { - UNRESERVED_CHARS[c] = true; - } - for (char c = 'a'; c <= 'z'; c++) { - UNRESERVED_CHARS[c] = true; - } - UNRESERVED_CHARS['-'] = true; - UNRESERVED_CHARS['.'] = true; - UNRESERVED_CHARS['_'] = true; - UNRESERVED_CHARS['~'] = true; + IntStream.rangeClosed('0', '9').forEach(DIGIT::set); + } + + private static final BitSet LOWER = new BitSet(NBITS); + + static { + IntStream.rangeClosed('a', 'z').forEach(LOWER::set); + } + + private static final BitSet UPPER = new BitSet(NBITS); + + static { + IntStream.rangeClosed('A', 'Z').forEach(UPPER::set); + } + + private static final BitSet ALPHA = new BitSet(NBITS); + + static { + ALPHA.or(LOWER); + ALPHA.or(UPPER); + } + + private static final BitSet ALPHA_DIGIT = new BitSet(NBITS); + + static { + ALPHA_DIGIT.or(ALPHA); + ALPHA_DIGIT.or(DIGIT); + } + + private static final BitSet UNRESERVED = new BitSet(NBITS); + + static { + UNRESERVED.or(ALPHA_DIGIT); + UNRESERVED.set('-'); + UNRESERVED.set('.'); + UNRESERVED.set('_'); + UNRESERVED.set('~'); + } + + private static final BitSet GEN_DELIMS = new BitSet(NBITS); + + static { + GEN_DELIMS.set(':'); + GEN_DELIMS.set('/'); + GEN_DELIMS.set('?'); + GEN_DELIMS.set('#'); + GEN_DELIMS.set('['); + GEN_DELIMS.set(']'); + GEN_DELIMS.set('@'); + } + + private static final BitSet SUB_DELIMS = new BitSet(NBITS); + + static { + SUB_DELIMS.set('!'); + SUB_DELIMS.set('$'); + SUB_DELIMS.set('&'); + SUB_DELIMS.set('\''); + SUB_DELIMS.set('('); + SUB_DELIMS.set(')'); + SUB_DELIMS.set('*'); + SUB_DELIMS.set('+'); + SUB_DELIMS.set(','); + SUB_DELIMS.set(';'); + SUB_DELIMS.set('='); + } + + public static final BitSet PCHAR = new BitSet(NBITS); + + static { + PCHAR.or(UNRESERVED); + PCHAR.or(SUB_DELIMS); + PCHAR.set(':'); + PCHAR.clear('&'); // XXX: Why? + } + + public static final BitSet QUERYCHAR = new BitSet(NBITS); + + static { + QUERYCHAR.or(GEN_DELIMS); + QUERYCHAR.or(PCHAR); + QUERYCHAR.set('/'); + QUERYCHAR.set('?'); + QUERYCHAR.clear('#'); + QUERYCHAR.clear('&'); + QUERYCHAR.clear('='); + } + + public static final BitSet FRAGMENTCHAR = new BitSet(NBITS); + + static { + FRAGMENTCHAR.or(GEN_DELIMS); + FRAGMENTCHAR.or(PCHAR); + FRAGMENTCHAR.set('/'); + FRAGMENTCHAR.set('?'); + FRAGMENTCHAR.set('&'); + FRAGMENTCHAR.clear('#'); } private StringUtil() { @@ -121,8 +208,8 @@ private StringUtil() { * * @since 2.0.0 */ - public static @NonNull String percentEncode(@NonNull final String source) { - if (!shouldEncode(source)) { + public static @NonNull String percentEncode(@NonNull final String source, BitSet unreservedChars) { + if (!shouldEncode(source, unreservedChars)) { return source; } @@ -131,7 +218,7 @@ private StringUtil() { int writePos = 0; for (byte b : src) { - if (shouldEncode(toUnsignedInt(b))) { + if (shouldEncode(toUnsignedInt(b), unreservedChars)) { dest[writePos++] = PERCENT_CHAR; dest[writePos++] = toHexDigit(b >> 4); dest[writePos++] = toHexDigit(b); @@ -191,20 +278,24 @@ private static byte toHexDigit(int b) { *

* @param c non-negative integer. */ - private static boolean isUnreserved(int c) { - return c < 128 && UNRESERVED_CHARS[c]; + private static boolean isUnreserved(int c, BitSet unreservedChars) { + if (c < 0 || c >= unreservedChars.length()) { + return false; + } + + return unreservedChars.get(c); } /** * @param c non-negative integer */ - private static boolean shouldEncode(int c) { - return !isUnreserved(c); + private static boolean shouldEncode(int c, BitSet unreservedChars) { + return !isUnreserved(c, unreservedChars); } - private static boolean shouldEncode(String s) { + private static boolean shouldEncode(String s, BitSet unreservedChars) { for (int i = 0, length = s.length(); i < length; i++) { - if (shouldEncode(s.charAt(i))) { + if (shouldEncode(s.charAt(i), unreservedChars)) { return true; } } diff --git a/src/test/java/com/github/packageurl/PackageURLTest.java b/src/test/java/com/github/packageurl/PackageURLTest.java index 71d42eaa..5cb25a6b 100644 --- a/src/test/java/com/github/packageurl/PackageURLTest.java +++ b/src/test/java/com/github/packageurl/PackageURLTest.java @@ -28,7 +28,12 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Arrays; import java.util.Locale; +import java.util.Map; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.jspecify.annotations.Nullable; import org.junit.jupiter.api.AfterAll; @@ -272,4 +277,39 @@ void npmCaseSensitive() throws Exception { assertEquals("Base64", base64Uppercase.getName()); assertEquals("1.0.0", base64Uppercase.getVersion()); } + + @Test + void uriEncode() throws URISyntaxException, MalformedPackageURLException { + String genDelims = "?#[]@"; // / + String subDelims = "!$&'()*+,;="; + String pchar = "/" + genDelims + subDelims + ":"; + String query = "key=" + pchar.replace("=", "%3D").replace("&", "%26") + "/?"; + String fragment = pchar + "/?"; + String scheme = "pkg"; + String type = "generic"; + String subpath = fragment.replaceFirst("^/+", ""); + URI uri = new URI(scheme, type, pchar, query, subpath); + PackageURL purl = new PackageURL(uri.toASCIIString()); + Map qualifiers = Arrays.stream(query.split("&")) + .map(kv -> kv.split("=")) + .filter(kvArray -> kvArray.length == 2) + .collect(Collectors.toMap(kv -> kv[0], kv -> kv[1])); + PackageURL purl2 = PackageURLBuilder.aPackageURL() + .withType(type) + .withNamespace("") + .withName(genDelims.replace("@", "")) + .withVersion(subDelims + ":") + .withQualifiers(qualifiers) + .withSubpath(subpath) + .build(); + assertEquals(purl, purl2); + assertEquals( + uri.getQuery(), + purl.getQualifiers().entrySet().stream() + .map(Map.Entry::toString) + .collect(Collectors.joining("&"))); + assertEquals(uri.getFragment(), purl.getSubpath()); + assertEquals(uri.getPath(), "/" + purl.getName() + '@' + purl.getVersion()); + assertEquals(uri.toASCIIString().replace("pkg://", "pkg:").replaceFirst("&", "%26"), purl.toString()); + } } diff --git a/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java b/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java index e05b5349..678ac5b6 100644 --- a/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java +++ b/src/test/java/com/github/packageurl/internal/StringUtilBenchmark.java @@ -21,6 +21,8 @@ */ package com.github.packageurl.internal; +import static com.github.packageurl.internal.StringUtil.PCHAR; + import java.nio.charset.StandardCharsets; import java.util.Locale; import java.util.Random; @@ -92,7 +94,7 @@ private String[] createDecodedData() { private static String[] encodeData(String[] decodedData) { String[] encodedData = new String[decodedData.length]; for (int i = 0; i < encodedData.length; i++) { - encodedData[i] = StringUtil.percentEncode(decodedData[i]); + encodedData[i] = StringUtil.percentEncode(decodedData[i], PCHAR); if (!StringUtil.percentDecode(encodedData[i]).equals(decodedData[i])) { throw new RuntimeException( "Invalid implementation of `percentEncode` and `percentDecode`.\nOriginal data: " @@ -139,7 +141,7 @@ public void percentDecode(final Blackhole blackhole) { @Benchmark public void percentEncode(final Blackhole blackhole) { for (int i = 0; i < DATA_COUNT; i++) { - blackhole.consume(StringUtil.percentEncode(decodedData[i])); + blackhole.consume(StringUtil.percentEncode(decodedData[i], PCHAR)); } } } diff --git a/src/test/resources/test-suite-data.json b/src/test/resources/test-suite-data.json index 2eb9b3b9..826ba491 100644 --- a/src/test/resources/test-suite-data.json +++ b/src/test/resources/test-suite-data.json @@ -86,7 +86,7 @@ { "description": "docker uses qualifiers and hash image id as versions", "purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io", - "canonical_purl": "pkg:docker/customer/dockerimage@sha256%3A244fd47e07d1004f0aed9c?repository_url=gcr.io", + "canonical_purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io", "type": "docker", "namespace": "customer", "name": "dockerimage", @@ -110,7 +110,7 @@ { "description": "maven often uses qualifiers", "purl": "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?repositorY_url=repo.spring.io/release&classifier=sources", - "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&repository_url=repo.spring.io%2Frelease", + "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&repository_url=repo.spring.io/release", "type": "maven", "namespace": "org.apache.xmlgraphics", "name": "batik-anim", @@ -122,7 +122,7 @@ { "description": "maven pom reference", "purl": "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?repositorY_url=repo.spring.io/release&extension=pom", - "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&repository_url=repo.spring.io%2Frelease", + "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&repository_url=repo.spring.io/release", "type": "maven", "namespace": "org.apache.xmlgraphics", "name": "batik-anim", @@ -314,7 +314,7 @@ { "description": "valid debian purl containing a plus in the name and version", "purl": "pkg:deb/debian/g++-10@10.2.1+6", - "canonical_purl": "pkg:deb/debian/g%2B%2B-10@10.2.1%2B6", + "canonical_purl": "pkg:deb/debian/g++-10@10.2.1+6", "type": "deb", "namespace": "debian", "name": "g++-10",