From 9ee9198897b09fc38f7e951f51c14843e6420fe5 Mon Sep 17 00:00:00 2001 From: David Walluck Date: Thu, 20 Mar 2025 11:06:20 -0400 Subject: [PATCH] fix: don't encode ':' or '/' as part of the canonical representation This makes the Java canonical representation match the majority of other implementations. Fixes #122 Fixes #92 --- .../com/github/packageurl/PackageURL.java | 147 ++++++++++++++++-- .../com/github/packageurl/PackageURLTest.java | 40 +++++ src/test/resources/test-suite-data.json | 8 +- 3 files changed, 175 insertions(+), 20 deletions(-) diff --git a/src/main/java/com/github/packageurl/PackageURL.java b/src/main/java/com/github/packageurl/PackageURL.java index a7adc5b0..ca6cec40 100644 --- a/src/main/java/com/github/packageurl/PackageURL.java +++ b/src/main/java/com/github/packageurl/PackageURL.java @@ -29,6 +29,7 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.BitSet; import java.util.Collections; import java.util.Map; import java.util.Objects; @@ -60,6 +61,110 @@ public final class PackageURL implements Serializable { private static final char PERCENT_CHAR = '%'; + private static final int NBITS = 128; + + private static final BitSet DIGIT = new BitSet(NBITS); + + static { + IntStream.rangeClosed('0', '9').forEach(DIGIT::set); + } + + private static final BitSet LOWER = new BitSet(NBITS); + + static { + IntStream.rangeClosed('a', 'z').forEach(LOWER::set); + } + + private static final BitSet UPPER = new BitSet(NBITS); + + static { + IntStream.rangeClosed('A', 'Z').forEach(UPPER::set); + } + + private static final BitSet ALPHA = new BitSet(NBITS); + + static { + ALPHA.or(LOWER); + ALPHA.or(UPPER); + } + + private static final BitSet ALPHA_DIGIT = new BitSet(NBITS); + + static { + ALPHA_DIGIT.or(ALPHA); + ALPHA_DIGIT.or(DIGIT); + } + + private static final BitSet UNRESERVED = new BitSet(NBITS); + + static { + UNRESERVED.or(ALPHA_DIGIT); + UNRESERVED.set('-'); + UNRESERVED.set('.'); + UNRESERVED.set('_'); + UNRESERVED.set('~'); + } + + private static final BitSet GEN_DELIMS = new BitSet(NBITS); + + static { + GEN_DELIMS.set(':'); + GEN_DELIMS.set('/'); + GEN_DELIMS.set('?'); + GEN_DELIMS.set('#'); + GEN_DELIMS.set('['); + GEN_DELIMS.set(']'); + GEN_DELIMS.set('@'); + } + + private static final BitSet SUB_DELIMS = new BitSet(NBITS); + + static { + SUB_DELIMS.set('!'); + SUB_DELIMS.set('$'); + SUB_DELIMS.set('&'); + SUB_DELIMS.set('\''); + SUB_DELIMS.set('('); + SUB_DELIMS.set(')'); + SUB_DELIMS.set('*'); + SUB_DELIMS.set('+'); + SUB_DELIMS.set(','); + SUB_DELIMS.set(';'); + SUB_DELIMS.set('='); + } + + private static final BitSet PCHAR = new BitSet(NBITS); + + static { + PCHAR.or(UNRESERVED); + PCHAR.or(SUB_DELIMS); + PCHAR.set(':'); + PCHAR.clear('&'); // XXX: Why? + } + + private static final BitSet QUERY = new BitSet(NBITS); + + static { + QUERY.or(GEN_DELIMS); + QUERY.or(PCHAR); + QUERY.set('/'); + QUERY.set('?'); + QUERY.clear('#'); + QUERY.clear('&'); + QUERY.clear('='); + } + + private static final BitSet FRAGMENT = new BitSet(NBITS); + + static { + FRAGMENT.or(GEN_DELIMS); + FRAGMENT.or(PCHAR); + FRAGMENT.set('/'); + FRAGMENT.set('?'); + FRAGMENT.set('&'); + FRAGMENT.clear('#'); + } + /** * The PackageURL scheme constant */ @@ -128,7 +233,7 @@ public PackageURL(final String purl) throws MalformedPackageURLException { * @since 1.0.0 */ public PackageURL(final String type, final String name) throws MalformedPackageURLException { - this(type, null, name, null, null, null); + this(type, null, name, null, (Map) null, null); } /** @@ -419,6 +524,7 @@ private static String validateName(final String type, final String value) throws validateKey(key); validateValue(key, entry.getValue()); } + return values; } @@ -500,12 +606,12 @@ private String canonicalize(boolean coordinatesOnly) { final StringBuilder purl = new StringBuilder(); purl.append(SCHEME_PART).append(type).append('/'); if (namespace != null) { - purl.append(encodePath(namespace)); + purl.append(encodePath(namespace, PCHAR)); purl.append('/'); } - purl.append(percentEncode(name)); + purl.append(percentEncode(name, PCHAR)); if (version != null) { - purl.append('@').append(percentEncode(version)); + purl.append('@').append(percentEncode(version, PCHAR)); } if (!coordinatesOnly) { @@ -519,23 +625,27 @@ private String canonicalize(boolean coordinatesOnly) { } purl.append(entry.getKey()); purl.append('='); - purl.append(percentEncode(entry.getValue())); + purl.append(percentEncode(entry.getValue(), QUERY)); separator = true; } } if (subpath != null) { - purl.append('#').append(encodePath(subpath)); + purl.append('#').append(encodePath(subpath, FRAGMENT)); } } return purl.toString(); } - private static boolean isUnreserved(int c) { - return (isValidCharForKey(c) || c == '~'); + private static boolean isUnreserved(int c, BitSet safe) { + if (c < 0 || c >= NBITS) { + return false; + } + + return safe.get(c); } - private static boolean shouldEncode(int c) { - return !isUnreserved(c); + private static boolean shouldEncode(int c, BitSet safe) { + return !isUnreserved(c, safe); } private static boolean isAlpha(int c) { @@ -666,7 +776,11 @@ private static boolean isPercent(int c) { return (c == PERCENT_CHAR); } - private static String percentEncode(final String source) { + static String percentEncode(final String source) { + return percentEncode(source, UNRESERVED); + } + + private static String percentEncode(final String source, final BitSet safe) { if (source.isEmpty()) { return source; } @@ -677,7 +791,7 @@ private static String percentEncode(final String source) { boolean changed = false; for (byte b : bytes) { - if (shouldEncode(b)) { + if (shouldEncode(b, safe)) { changed = true; byte b1 = (byte) Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); byte b2 = (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16)); @@ -813,8 +927,7 @@ private static void verifyTypeConstraints(String type, @Nullable String namespac } } - @SuppressWarnings("StringSplitter") // reason: surprising behavior is okay in this case - private static @Nullable Map parseQualifiers(final String encodedString) + static @Nullable Map parseQualifiers(final String encodedString) throws MalformedPackageURLException { try { final TreeMap results = Arrays.stream(encodedString.split("&")) @@ -845,8 +958,10 @@ private static String[] parsePath(final String path, final boolean isSubpath) { .toArray(String[]::new); } - private static String encodePath(final String path) { - return Arrays.stream(path.split("/")).map(PackageURL::percentEncode).collect(Collectors.joining("/")); + private String encodePath(final String path, BitSet safe) { + return Arrays.stream(path.split("/")) + .map(source -> percentEncode(source, safe)) + .collect(Collectors.joining("/")); } /** diff --git a/src/test/java/com/github/packageurl/PackageURLTest.java b/src/test/java/com/github/packageurl/PackageURLTest.java index 9dd58ee5..7616169b 100644 --- a/src/test/java/com/github/packageurl/PackageURLTest.java +++ b/src/test/java/com/github/packageurl/PackageURLTest.java @@ -28,7 +28,12 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Arrays; import java.util.Locale; +import java.util.Map; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.jspecify.annotations.Nullable; import org.junit.jupiter.api.AfterAll; @@ -278,4 +283,39 @@ void npmCaseSensitive() throws Exception { assertEquals("Base64", base64Uppercase.getName()); assertEquals("1.0.0", base64Uppercase.getVersion()); } + + @Test + void uriEncode() throws URISyntaxException, MalformedPackageURLException { + String genDelims = "?#[]@"; // / + String subDelims = "!$&'()*+,;="; + String pchar = "/" + genDelims + subDelims + ":"; + String query = "key=" + pchar.replace("=", "%3D").replace("&", "%26") + "/?"; + String fragment = pchar + "/?"; + String scheme = "pkg"; + String type = "generic"; + String subpath = fragment.replaceFirst("^/+", ""); + URI uri = new URI(scheme, type, pchar, query, subpath); + PackageURL purl = new PackageURL(uri.toASCIIString()); + Map qualifiers = Arrays.stream(query.split("&")) + .map(kv -> kv.split("=")) + .filter(kvArray -> kvArray.length == 2) + .collect(Collectors.toMap(kv -> kv[0], kv -> kv[1])); + PackageURL purl2 = PackageURLBuilder.aPackageURL() + .withType(type) + .withNamespace("") + .withName(genDelims.replace("@", "")) + .withVersion(subDelims + ":") + .withQualifiers(qualifiers) + .withSubpath(subpath) + .build(); + assertEquals(purl, purl2); + assertEquals( + uri.getQuery(), + purl.getQualifiers().entrySet().stream() + .map(Map.Entry::toString) + .collect(Collectors.joining("&"))); + assertEquals(uri.getFragment(), purl.getSubpath()); + assertEquals(uri.getPath(), "/" + purl.getName() + '@' + purl.getVersion()); + assertEquals(uri.toASCIIString().replace("pkg://", "pkg:").replaceFirst("&", "%26"), purl.toString()); + } } diff --git a/src/test/resources/test-suite-data.json b/src/test/resources/test-suite-data.json index 2eb9b3b9..826ba491 100644 --- a/src/test/resources/test-suite-data.json +++ b/src/test/resources/test-suite-data.json @@ -86,7 +86,7 @@ { "description": "docker uses qualifiers and hash image id as versions", "purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io", - "canonical_purl": "pkg:docker/customer/dockerimage@sha256%3A244fd47e07d1004f0aed9c?repository_url=gcr.io", + "canonical_purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io", "type": "docker", "namespace": "customer", "name": "dockerimage", @@ -110,7 +110,7 @@ { "description": "maven often uses qualifiers", "purl": "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?repositorY_url=repo.spring.io/release&classifier=sources", - "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&repository_url=repo.spring.io%2Frelease", + "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&repository_url=repo.spring.io/release", "type": "maven", "namespace": "org.apache.xmlgraphics", "name": "batik-anim", @@ -122,7 +122,7 @@ { "description": "maven pom reference", "purl": "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?repositorY_url=repo.spring.io/release&extension=pom", - "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&repository_url=repo.spring.io%2Frelease", + "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&repository_url=repo.spring.io/release", "type": "maven", "namespace": "org.apache.xmlgraphics", "name": "batik-anim", @@ -314,7 +314,7 @@ { "description": "valid debian purl containing a plus in the name and version", "purl": "pkg:deb/debian/g++-10@10.2.1+6", - "canonical_purl": "pkg:deb/debian/g%2B%2B-10@10.2.1%2B6", + "canonical_purl": "pkg:deb/debian/g++-10@10.2.1+6", "type": "deb", "namespace": "debian", "name": "g++-10",