This guide covers common integration patterns for CLI tools and file-processing
programs. All examples require syto as a library dependency and OCaml 4.14+.
The simplest use: take the output of Sys.readdir and filter by pattern.
let ml_files dir =
dir
|> Sys.readdir
|> Array.to_list
|> Syto.filter ~pattern:"*.ml"Add PATHNAME to prevent *.ml from matching paths with slashes.
Without it, "lib/foo.ml" matches *.ml because * matches /.
let top_level_ml dir =
dir
|> Sys.readdir
|> Array.to_list
|> Syto.filter ~flags:[`PATHNAME] ~pattern:"*.ml"
(* "lib/foo.ml" does not match; "foo.ml" does *)Without PERIOD, wildcards match names beginning with ..
With PERIOD, a leading dot requires an explicit . in the pattern.
(* All files except dotfiles *)
let visible_files dir =
dir |> Sys.readdir |> Array.to_list
|> Syto.filter ~flags:[`PERIOD] ~pattern:"*"
(* Only dotfiles *)
let hidden_files dir =
dir |> Sys.readdir |> Array.to_list
|> Syto.filter ~flags:[`PERIOD] ~pattern:".*"With PATHNAME also set, the same rule applies after each /:
let flags = [`PATHNAME; `PERIOD]
(* src/*.ml matches src/foo.ml but not src/.hidden.ml *)
let () =
let paths = ["src/foo.ml"; "src/.hidden.ml"; "lib/bar.ml"] in
assert (Syto.filter ~flags ~pattern:"src/*.ml" paths = ["src/foo.ml"])GLOBSTAR requires relative paths that include directory separators.
Build a recursive walker, then pass the flat list to Syto.filter.
(* Produce relative paths: ["src/foo.ml"; "src/lib/bar.ml"; ...] *)
let rec collect root prefix =
let dir = if prefix = "" then root else Filename.concat root prefix in
Sys.readdir dir
|> Array.to_list
|> List.concat_map (fun entry ->
let rel = if prefix = "" then entry else prefix ^ "/" ^ entry in
let full = Filename.concat root rel in
if Sys.is_directory full then collect root rel else [rel])
(* Find all OCaml sources under the project root *)
let find_ml root =
collect root ""
|> Syto.filter ~flags:[`PATHNAME; `GLOBSTAR] ~pattern:"**/*.ml"
(* Same but exclude _build/ *)
let find_sources root =
collect root ""
|> List.filter (fun p ->
not (Syto.match_pattern
~flags:[`PATHNAME; `GLOBSTAR] ~pattern:"_build/**" ~name:p))
|> Syto.filter ~flags:[`PATHNAME; `GLOBSTAR] ~pattern:"**/*.ml"Key GLOBSTAR behaviours:
**/*.mlmatchesfoo.ml(zero components) anda/b/foo.ml(two components).a/**matchesa/anda/b/cbut nota— trailing**requires at least one path boundary.**/.*withPERIODfinds any dotfile at any depth while keeping the explicit-dot requirement.
A .gitignore-style filter: exclude any path that matches at least one rule.
let ignore_flags = [`PATHNAME; `PERIOD; `GLOBSTAR]
let is_ignored rules name =
List.exists
(fun pattern -> Syto.match_pattern ~flags:ignore_flags ~pattern ~name)
rules
let apply_ignore rules names =
List.filter (fun name -> not (is_ignored rules name)) names
let default_rules = [
"_build/**";
"*.byte";
"*.native";
".git/**";
"**/.DS_Store";
"**/.*~";
]
let () =
let files = collect "." "" in
List.iter print_endline (apply_ignore default_rules files)Syto.filter parses the pattern once and matches all names against it.
is_ignored above parses each rule on every call to match_pattern.
For a small fixed rule set checked against a large file list, the overhead
is negligible — rule count is bounded. If you have many rules and need to
check each name only once, run filter per rule and track exclusions:
let apply_ignore_fast rules names =
let excluded = Hashtbl.create 64 in
List.iter (fun pattern ->
Syto.filter ~flags:ignore_flags ~pattern names
|> List.iter (fun n -> Hashtbl.replace excluded n ())
) rules;
List.filter (fun n -> not (Hashtbl.mem excluded n)) namesCASEFOLD folds ASCII A–Z to a–z before comparison — on both the pattern
side and the name side.
(* Match image files regardless of extension case *)
let is_image name =
List.exists
(fun pat -> Syto.match_pattern ~flags:[`CASEFOLD] ~pattern:pat ~name)
["*.jpg"; "*.jpeg"; "*.png"; "*.gif"; "*.webp"]
let () =
let files = ["photo.JPG"; "image.png"; "doc.PDF"; "icon.GIF"] in
assert (List.filter is_image files = ["photo.JPG"; "image.png"; "icon.GIF"])CASEFOLD combines cleanly with PATHNAME:
(* Case-insensitive match within a specific directory *)
Syto.match_pattern
~flags:[`PATHNAME; `CASEFOLD]
~pattern:"SRC/*.ML"
~name:"src/foo.ml";;
(* - : bool = true *)CASEFOLD is ASCII-only. Ukrainian А (U+0410) and а (U+0430) are
distinct codepoints and remain distinct. If you need case-insensitive
matching for a specific non-ASCII script, normalize both strings before
calling syto.
syto decodes strings as UTF-8. ? matches one Unicode codepoint, which may
span 1–4 bytes. Character ranges compare by codepoint value.
(* Ukrainian word яблуко: 6 codepoints, 12 UTF-8 bytes *)
Syto.match_pattern ~pattern:"??????" ~name:"яблуко";;
(* - : bool = true — one ? per codepoint *)
Syto.match_pattern ~pattern:"????????????" ~name:"яблуко";;
(* - : bool = false — 12 bytes is not 12 codepoints *)
(* Cyrillic lowercase range а–я (U+0430–U+044F) *)
Syto.match_pattern ~pattern:"[а-я]*" ~name:"яблуко";;
(* - : bool = true *)
Syto.match_pattern ~pattern:"[а-я]*" ~name:"apple";;
(* - : bool = false *)Ukrainian filenames use two different apostrophe characters in practice:
' (U+0027, ASCII) and ' (U+2019, RIGHT SINGLE QUOTATION MARK). They
are distinct codepoints. Use * or ['\u2019]-style patterns when you
need to match either:
(* Matches м'яч with either apostrophe variant *)
Syto.match_pattern ~pattern:"м*яч" ~name:"м'яч";;
(* - : bool = true *)match_pattern and filter raise Syto.Error on invalid patterns.
Patterns from user input should be caught:
let safe_filter ~pattern names =
match Syto.filter ~pattern names with
| result -> Ok result
| exception Syto.Error { kind; pattern; offset } ->
let reason = match kind with
| Syto.Unterminated_bracket -> "unterminated bracket expression"
| Syto.Invalid_range -> "invalid range in bracket expression"
| Syto.Unknown_class s -> "unknown character class [:" ^ s ^ ":]"
| Syto.Trailing_escape -> "pattern ends with backslash"
| Syto.Unsupported_bracket_syntax -> "collating or equivalence class not supported"
in
Error (Printf.sprintf "bad pattern %S (byte %d): %s" pattern offset reason)offset is a byte offset into pattern, consistent with String.sub.
Errors are never raised because of the name string — invalid UTF-8 in a
name is handled silently (each invalid byte is treated as a single unit).
| Use case | Flags |
|---|---|
| Shell glob, exclude dotfiles | [`PATHNAME; `PERIOD] |
| gitignore-style rules | [`PATHNAME; `PERIOD; `GLOBSTAR] |
| Recursive source discovery | [`PATHNAME; `GLOBSTAR] |
| Case-insensitive file search | [`CASEFOLD] |
| Case-insensitive with path boundaries | [`PATHNAME; `CASEFOLD] |
| Match everything including dotfiles | [] |
| Literal pattern, no escape processing | [`NOESCAPE] |