Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 69 additions & 8 deletions FileInspectorX.Tests/DetectorTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,9 @@ public void Detect_Text_Html() {
}

[Fact]
public void Detect_Docx_ByZipRefinement() {
var path = Path.GetTempFileName();
var docx = path + ".zip"; // name does not matter; content does
public void Detect_Docx_ByZipRefinement() {
var path = Path.GetTempFileName();
var docx = path + ".zip"; // name does not matter; content does
try {
using (var fs = File.Create(docx))
using (var za = new ZipArchive(fs, ZipArchiveMode.Create, leaveOpen: true)) {
Expand All @@ -228,11 +228,72 @@ public void Detect_Docx_ByZipRefinement() {
Assert.Equal("docx", res!.Extension);
Assert.Equal("application/vnd.openxmlformats-officedocument.wordprocessingml.document", res.MimeType);
Assert.Equal("High", res.Confidence);
} finally { if (File.Exists(path)) File.Delete(path); if (File.Exists(docx)) File.Delete(docx); }
}

[Fact]
public void Detect_Tar_ByUstar() {
} finally { if (File.Exists(path)) File.Delete(path); if (File.Exists(docx)) File.Delete(docx); }
}

[Fact]
public void Detect_Apk_ByZipSubtype_UsesSubtypeMime_And_DeclaredComparison()
{
var path = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N") + ".apk");
try
{
using (var fs = File.Create(path))
using (var za = new ZipArchive(fs, ZipArchiveMode.Create, leaveOpen: true))
{
za.CreateEntry("AndroidManifest.xml");
za.CreateEntry("classes.dex");
}

var res = FI.Detect(path);

Assert.NotNull(res);
// ZIP subtype detection intentionally keeps the base container extension while upgrading the MIME/guess.
Assert.Equal("zip", res!.Extension);
Assert.Equal("apk", res.GuessedExtension);
Assert.Equal("application/vnd.android.package-archive", res.MimeType);

var cmp = FI.CompareDeclaredDetailed(".apk", res);
Assert.False(cmp.Mismatch);
Assert.Equal("apk", cmp.DetectedExtension);
}
finally
{
if (File.Exists(path)) File.Delete(path);
}
}

[Fact]
public void Detect_Jar_ByZipSubtype_UsesSubtypeMime_And_DeclaredComparison()
{
var path = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N") + ".jar");
try
{
using (var fs = File.Create(path))
using (var za = new ZipArchive(fs, ZipArchiveMode.Create, leaveOpen: true))
{
za.CreateEntry("META-INF/MANIFEST.MF");
za.CreateEntry("com/example/App.class");
}

var res = FI.Detect(path);

Assert.NotNull(res);
Assert.Equal("zip", res!.Extension);
Assert.Equal("jar", res.GuessedExtension);
Assert.Equal("application/java-archive", res.MimeType);

var cmp = FI.CompareDeclaredDetailed(".jar", res);
Assert.False(cmp.Mismatch);
Assert.Equal("jar", cmp.DetectedExtension);
}
finally
{
if (File.Exists(path)) File.Delete(path);
}
}

[Fact]
public void Detect_Tar_ByUstar() {
var tar = Path.GetTempFileName();
try {
var buf = new byte[600];
Expand Down
40 changes: 40 additions & 0 deletions FileInspectorX.Tests/FriendlyNamesTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,44 @@ public void GetTypeLabel_Returns_Cabinet_Label()

Assert.Equal("Windows cabinet archive", label);
}

[Fact]
public void GetTypeLabel_Returns_Apk_Label_From_Zip_Subtype()
{
var detection = new ContentTypeDetectionResult
{
Extension = "zip",
MimeType = "application/vnd.android.package-archive",
GuessedExtension = "apk"
};

var label = FriendlyNames.GetTypeLabel(detection, new FileAnalysis
{
ContainerSubtype = "apk"
});

Assert.Equal("Android package (APK)", label);
}

[Theory]
[InlineData("parquet", "Apache Parquet data file")]
[InlineData("pcapng", "Packet capture (PCAPNG)")]
[InlineData("wasm", "WebAssembly module")]
[InlineData("heic", "HEIC image")]
[InlineData("png", "PNG image")]
[InlineData("p7b", "PKCS#7 certificate bundle")]
[InlineData("mp4", "MPEG-4 video")]
[InlineData("xz", "XZ compressed file")]
public void GetTypeLabel_Returns_Friendly_Label_For_Additional_Specialized_Types(string extension, string expected)
{
var detection = new ContentTypeDetectionResult
{
Extension = extension,
MimeType = "application/octet-stream"
};

var label = FriendlyNames.GetTypeLabel(detection, new FileAnalysis());

Assert.Equal(expected, label);
}
}
42 changes: 32 additions & 10 deletions FileInspectorX/Detection/FileInspector.DeclaredComparison.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,36 @@ public static DeclaredTypeComparison CompareDeclaredDetailed(
DetectedReason = detected?.Reason
};

var baseCmp = CompareDeclared(declaredExtension, detected);
cmp.Mismatch = baseCmp.Mismatch;
cmp.Reason = baseCmp.Reason ?? string.Empty;

if (detected == null || string.IsNullOrEmpty(decl))
return cmp;

var strong = GetStrongAlternatives(detected, detExt);
var baseCmp = CompareDeclared(declaredExtension, detected);
cmp.Mismatch = baseCmp.Mismatch;
cmp.Reason = baseCmp.Reason ?? string.Empty;

if (detected == null || string.IsNullOrEmpty(decl))
return cmp;

var detection = detected;

if (!string.IsNullOrEmpty(decl) &&
!string.IsNullOrEmpty(detGuess) &&
!string.Equals(detExt, detGuess, StringComparison.OrdinalIgnoreCase))
{
var guessOnlyCmp = CompareDeclared(
decl,
new ContentTypeDetectionResult
{
Extension = detGuess ?? string.Empty,
MimeType = detected?.MimeType ?? string.Empty,
Confidence = detected?.Confidence ?? string.Empty,
Reason = detected?.Reason ?? string.Empty
});

if (!guessOnlyCmp.Mismatch)
{
cmp.DetectedExtension = detGuess;
}
}

var strong = GetStrongAlternatives(detection, detExt);
if (strong.Count > 0)
{
cmp.StrongAlternatives = strong;
Expand Down Expand Up @@ -62,8 +84,8 @@ bool IsDangerous(string? ext)
cmp.StrongDangerousAlternativeExtensions = dangerousAlt;

cmp.IsDeclaredDangerous = !string.IsNullOrEmpty(decl) && IsDangerous(decl);
bool detectedDanger = detected.IsDangerous ||
(!string.IsNullOrEmpty(detExt) && IsDangerous(detExt));
bool detectedDanger = detection.IsDangerous ||
(!string.IsNullOrEmpty(detExt) && IsDangerous(detExt));
if (dangerousAlt.Count > 0) detectedDanger = true;
cmp.IsDetectedDangerous = detectedDanger;

Expand Down
60 changes: 39 additions & 21 deletions FileInspectorX/FileInspector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,13 @@ public static (bool Mismatch, string Reason) CompareDeclared(string? declaredExt
det = detGuess!;
detLabel = detGuess! + "(guess)";
}
else
{
return (false, "no-detection-or-declared");
}
// Treat common synonyms as equivalent (avoid false mismatches)
static bool Equivalent(string a, string b) {
else
{
return (false, "no-detection-or-declared");
}

// Treat common synonyms as equivalent (avoid false mismatches)
static bool Equivalent(string a, string b) {
if (string.Equals(a, b, StringComparison.OrdinalIgnoreCase)) return true;
// .cer <-> .crt
if ((a.Equals("cer", StringComparison.OrdinalIgnoreCase) && b.Equals("crt", StringComparison.OrdinalIgnoreCase)) ||
Expand Down Expand Up @@ -122,12 +122,21 @@ static bool Equivalent(string a, string b) {
if ((a.Equals("msi", StringComparison.OrdinalIgnoreCase) && b.Equals("ole2", StringComparison.OrdinalIgnoreCase)) ||
(a.Equals("ole2", StringComparison.OrdinalIgnoreCase) && b.Equals("msi", StringComparison.OrdinalIgnoreCase))) return true;
// Plain‑text family: treat generic text and note/config/log formats as equivalent
if (InPlainTextFamily(a) && InPlainTextFamily(b)) return true;
return false;
}

static bool InPlainTextFamily(string ext)
{
if (InPlainTextFamily(a) && InPlainTextFamily(b)) return true;
return false;
}

if (!string.IsNullOrEmpty(detGuess) &&
!Equivalent(decl, det) &&
Equivalent(decl, detGuess!))
{
det = detGuess!;
// Keep the detected extension as the base ZIP family while still showing that the guess resolved the declared subtype.
detLabel = detGuess! + " (guess)";
}

static bool InPlainTextFamily(string ext)
{
// Conservative set: generic text and common note/config/log formats; excludes csv/tsv/scripts
switch ((ext ?? string.Empty).ToLowerInvariant())
{
Expand Down Expand Up @@ -401,14 +410,23 @@ private static FileStream OpenReadShared(string path)
var refined = TryRefineZipOOxml(stream);
if (refined != null) return Finish(Enrich(refined, src, stream, options));
var confZip = sig.Prefix != null && sig.Prefix.Length >= 4 ? "High" : (sig.Prefix != null && sig.Prefix.Length == 3 ? "Medium" : "Low");
var guess = TryGuessZipSubtype(stream, out var guessMime);
var basicZip = new ContentTypeDetectionResult {
Extension = sig.Extension,
MimeType = NormalizeMime(sig.Extension, sig.MimeType),
Confidence = confZip,
Reason = $"magic:{sig.Extension}",
GuessedExtension = guess
};
var guess = TryGuessZipSubtype(stream, out var guessMime);
if (string.IsNullOrWhiteSpace(guessMime) &&
!string.IsNullOrWhiteSpace(guess) &&
MimeMaps.TryGetByExtension(guess, out var guessedMime) &&
!string.IsNullOrWhiteSpace(guessedMime))
{
guessMime = guessedMime;
}
var basicZip = new ContentTypeDetectionResult {
Extension = sig.Extension,
MimeType = !string.IsNullOrWhiteSpace(guessMime)
? guessMime!
: NormalizeMime(sig.Extension, sig.MimeType),
Confidence = confZip,
Reason = $"magic:{sig.Extension}",
GuessedExtension = guess
};
return Finish(Enrich(basicZip, src, stream, options));
}
if (sig.Extension == "ole2" && stream is not null) {
Expand Down
52 changes: 51 additions & 1 deletion FileInspectorX/Humanization/FriendlyNames.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,47 @@ public static class FriendlyNames
var mime = det?.MimeType?.ToLowerInvariant();
if (string.IsNullOrEmpty(ext) && string.IsNullOrEmpty(mime)) return null;

var subtype = a.ContainerSubtype?.ToLowerInvariant();
if (string.IsNullOrEmpty(subtype) && ext == "zip")
{
subtype = det?.GuessedExtension?.ToLowerInvariant();
}

switch (subtype)
{
case "jar": return "Java archive (JAR)";
case "apk": return "Android package (APK)";
case "ipa": return "iOS application archive (IPA)";
case "epub": return "EPUB e-book";
case "odt": return "OpenDocument text document";
case "ods": return "OpenDocument spreadsheet";
case "odp": return "OpenDocument presentation";
case "odg": return "OpenDocument drawing";
case "kmz": return "Google Earth KMZ archive";
case "vsix": return "Visual Studio extension (VSIX)";
case "nupkg": return "NuGet package (NUPKG)";
case "xap": return "Silverlight application package (XAP)";
case "appx":
case "msix": return "Windows app package";
}

// Extension-first friendly map
switch (ext)
{
case "png": return "PNG image";
case "jpg":
case "jpeg": return "JPEG image";
case "gif": return "GIF image";
case "bmp": return "Bitmap image";
case "webp": return "WebP image";
case "tif":
case "tiff": return "TIFF image";
case "ico": return "Icon image";
case "pfx":
case "p12": return "PKCS#12 / PFX archive";
case "p7b": return "PKCS#7 certificate bundle";
case "spc": return "Software publisher certificate bundle";
case "p7s": return "PKCS#7 signature";
case "crt":
case "cer": return "X.509 certificate";
case "csr": return "Certificate signing request (CSR)";
Expand Down Expand Up @@ -90,9 +126,23 @@ public static class FriendlyNames
case "gz": return "GZIP compressed file";
case "cab": return "Windows cabinet archive";
case "nupkg": return "NuGet package (NUPKG)";
case "xap": return "Silverlight application package (XAP)";
case "exe": return "Windows executable (.exe)";
case "dll": return "Windows library (.dll)";
case "parquet": return "Apache Parquet data file";
case "pcap": return "Packet capture (PCAP)";
case "pcapng": return "Packet capture (PCAPNG)";
case "wasm": return "WebAssembly module";
case "heic": return "HEIC image";
case "flac": return "FLAC audio";
case "wav": return "WAV audio";
case "mp3": return "MP3 audio";
case "m4a": return "AAC audio (M4A)";
case "mp4": return "MPEG-4 video";
case "avi": return "AVI video";
case "3gp": return "3GPP media";
case "bz2": return "BZip2 compressed file";
case "xz": return "XZ compressed file";
case "zst": return "Zstandard compressed file";
case "dmp":
if (string.Equals(mime, "application/x-ms-protected-dump", System.StringComparison.OrdinalIgnoreCase))
return "Protected Windows crash dump";
Expand Down
Loading