|
| 1 | +using Pkg |
| 2 | +Pkg.instantiate() |
| 3 | + |
| 4 | +using JSON |
| 5 | +import JuliaSyntax |
| 6 | + |
| 7 | +abstract type Cell end |
| 8 | +struct JuliaCodeCell <: Cell |
| 9 | + code::String |
| 10 | +end |
| 11 | +function JSON.lower(cell::JuliaCodeCell) |
| 12 | + return Dict( |
| 13 | + "cell_type" => "code", |
| 14 | + "source" => cell.code, |
| 15 | + "metadata" => Dict(), |
| 16 | + "outputs" => Any[], |
| 17 | + "execution_count" => nothing, |
| 18 | + ) |
| 19 | +end |
| 20 | +struct MarkdownCell <: Cell |
| 21 | + content::String |
| 22 | +end |
| 23 | +function JSON.lower(cell::MarkdownCell) |
| 24 | + return Dict( |
| 25 | + "cell_type" => "markdown", |
| 26 | + "source" => cell.content, |
| 27 | + "metadata" => Dict(), |
| 28 | + ) |
| 29 | +end |
| 30 | + |
| 31 | +struct Notebook |
| 32 | + cells::Vector{Cell} |
| 33 | +end |
| 34 | +function JSON.lower(nb::Notebook) |
| 35 | + return Dict( |
| 36 | + "cells" => [JSON.lower(cell) for cell in nb.cells], |
| 37 | + "metadata" => Dict( |
| 38 | + "kernelspec" => Dict( |
| 39 | + "display_name" => "Julia", |
| 40 | + "language" => "julia", |
| 41 | + "name" => "julia" |
| 42 | + ), |
| 43 | + "language_info" => Dict( |
| 44 | + "file_extension" => ".jl", |
| 45 | + "mimetype" => "application/julia", |
| 46 | + "name" => "julia" |
| 47 | + ) |
| 48 | + ), |
| 49 | + "nbformat" => 4, |
| 50 | + "nbformat_minor" => 5 |
| 51 | + ) |
| 52 | +end |
| 53 | + |
| 54 | +""" |
| 55 | + fix_callouts(md_content::AbstractString)::String |
| 56 | +
|
| 57 | +Convert Quarto callouts in `md_content` to blockquotes. |
| 58 | +""" |
| 59 | +function fix_callouts(md_content::AbstractString)::String |
| 60 | + # Quarto callouts look like, for example, `::: {.callout-note}` |
| 61 | + # There isn't a good Jupyter equivalent, so we'll just use blockquotes. |
| 62 | + # https://github.com/quarto-dev/quarto-cli/issues/1167 |
| 63 | + callout_regex = r"^:::\s*\{\.callout-\w+\}.*$" |
| 64 | + callout_end_regex = r"^:::\s*$" |
| 65 | + new_lines = String[] |
| 66 | + in_callout = false |
| 67 | + for line in split(md_content, '\n') |
| 68 | + if in_callout |
| 69 | + if occursin(callout_end_regex, line) |
| 70 | + in_callout = false |
| 71 | + else |
| 72 | + push!(new_lines, "> " * line) |
| 73 | + end |
| 74 | + else |
| 75 | + if occursin(callout_regex, line) |
| 76 | + in_callout = true |
| 77 | + else |
| 78 | + push!(new_lines, line) |
| 79 | + end |
| 80 | + end |
| 81 | + end |
| 82 | + return join(new_lines, '\n') |
| 83 | +end |
| 84 | + |
| 85 | +""" |
| 86 | + parse_cells(qmd_path::String)::Notebook |
| 87 | +
|
| 88 | +Parse a .qmd file. Returns a vector of `Cell` objects representing the code and markdown |
| 89 | +cells, as well as a set of imported packages found in Julia code cells. |
| 90 | +""" |
| 91 | +function parse_cells(qmd_path::String)::Notebook |
| 92 | + content = read(qmd_path, String) |
| 93 | + |
| 94 | + # Remove YAML front matter. |
| 95 | + yaml_front_matter_regex = r"^---\n(.*?)\n---\n"s |
| 96 | + content = replace(content, yaml_front_matter_regex => "") |
| 97 | + content = strip(content) |
| 98 | + |
| 99 | + packages = Set{Symbol}() |
| 100 | + # Extract code blocks. |
| 101 | + executable_content_regex = r"```\{(\w+)\}(.*?)```"s |
| 102 | + # These are Markdown cells. |
| 103 | + markdown_cell_contents = split(content, executable_content_regex; keepempty=true) |
| 104 | + # These are code cells |
| 105 | + code_cell_contents = collect(eachmatch(executable_content_regex, content)) |
| 106 | + # Because we set `keepempty=true`, `splits` will always have one more element than `matches`. |
| 107 | + # We can interleave them to reconstruct the document structure. |
| 108 | + cells = Cell[] |
| 109 | + for (i, md_content) in enumerate(markdown_cell_contents) |
| 110 | + md_content = strip(md_content) |
| 111 | + if !isempty(md_content) |
| 112 | + push!(cells, MarkdownCell(fix_callouts(md_content))) |
| 113 | + end |
| 114 | + if i <= length(code_cell_contents) |
| 115 | + match = code_cell_contents[i] |
| 116 | + lang = match.captures[1] |
| 117 | + code = strip(match.captures[2]) |
| 118 | + if lang == "julia" |
| 119 | + cell = JuliaCodeCell(code) |
| 120 | + push!(cells, cell) |
| 121 | + union!(packages, extract_imports(cell)) |
| 122 | + else |
| 123 | + # There are some code cells that are not Julia for example |
| 124 | + # dot and mermaid. You can see what cells there are with |
| 125 | + # git grep -E '```\{.+\}' | grep -v julia |
| 126 | + # For these cells we'll just convert to Markdown. |
| 127 | + push!(cells, MarkdownCell("```$lang\n$code\n```")) |
| 128 | + end |
| 129 | + end |
| 130 | + end |
| 131 | + |
| 132 | + # Prepend a cell to install the necessary packages |
| 133 | + imports_as_string = join(["\"" * string(pkg) * "\"" for pkg in packages], ", ") |
| 134 | + new_cell = JuliaCodeCell("# Install necessary dependencies.\nusing Pkg\nPkg.activate(; temp=true)\nPkg.add([$imports_as_string])") |
| 135 | + cells = [new_cell, cells...] |
| 136 | + |
| 137 | + # And we're done! |
| 138 | + return Notebook(cells) |
| 139 | +end |
| 140 | + |
| 141 | +""" |
| 142 | + extract_imports(cell::JuliaCodeCell)::Set{Symbol} |
| 143 | +
|
| 144 | +Extract all packages that are imported inside `cell`. |
| 145 | +""" |
| 146 | +function extract_imports(cell::JuliaCodeCell)::Set{Symbol} |
| 147 | + toplevel_expr = JuliaSyntax.parseall(Expr, cell.code) |
| 148 | + imports = Set{Symbol}() |
| 149 | + for expr in toplevel_expr.args |
| 150 | + if expr isa Expr && (expr.head == :using || expr.head == :import) |
| 151 | + for arg in expr.args |
| 152 | + if arg isa Expr && arg.head == :. |
| 153 | + push!(imports, arg.args[1]) |
| 154 | + elseif arg isa Expr && arg.head == :(:) |
| 155 | + subarg = arg.args[1] |
| 156 | + if subarg isa Expr && subarg.head == :. |
| 157 | + push!(imports, subarg.args[1]) |
| 158 | + end |
| 159 | + elseif arg isa Expr && arg.head == :as |
| 160 | + subarg = arg.args[1] |
| 161 | + if subarg isa Expr && subarg.head == :. |
| 162 | + push!(imports, subarg.args[1]) |
| 163 | + elseif subarg isa Symbol |
| 164 | + push!(imports, subarg) |
| 165 | + end |
| 166 | + end |
| 167 | + end |
| 168 | + end |
| 169 | + end |
| 170 | + return imports |
| 171 | +end |
| 172 | + |
| 173 | +function convert_qmd_to_ipynb(in_qmd_path::String, out_ipynb_path::String) |
| 174 | + @info "converting $in_qmd_path to $out_ipynb_path..." |
| 175 | + notebook = parse_cells(in_qmd_path) |
| 176 | + JSON.json(out_ipynb_path, notebook; pretty=true) |
| 177 | + @info " - done." |
| 178 | +end |
| 179 | + |
| 180 | +function add_ipynb_link_to_html(html_path::String, ipynb_path::String) |
| 181 | + # this would look like "getting-started.ipynb" and is used when downloading a notebook |
| 182 | + SUGGESTED_FILENAME = basename(dirname(ipynb_path)) * ".ipynb" |
| 183 | + # The Colab URL needs to look like |
| 184 | + # https://colab.research.google.com/github/TuringLang/docs/blob/gh-pages/path/to/notebook.ipynb |
| 185 | + # Because ipynb_path has `_site/` prefix, we need to strip that off. |
| 186 | + ipynb_path_no_site = replace(ipynb_path, r"^_site/" => "") |
| 187 | + PATH_PREFIX = get(ENV, "PATH_PREFIX", "") |
| 188 | + COLAB_URL = "https://colab.research.google.com/github/TuringLang/docs/blob/gh-pages$PATH_PREFIX/$ipynb_path_no_site" |
| 189 | + @info "adding link to ipynb notebook in $html_path... with PATH_PREFIX='$PATH_PREFIX'" |
| 190 | + if !isfile(html_path) |
| 191 | + @info " - HTML file $html_path does not exist; skipping" |
| 192 | + return |
| 193 | + end |
| 194 | + html_content = read(html_path, String) |
| 195 | + if occursin("colab.research.google.com", html_content) |
| 196 | + @info " - colab link already present; skipping" |
| 197 | + return |
| 198 | + end |
| 199 | + # The line to edit looks like this: |
| 200 | + # <div class="toc-actions"><ul><li><a href="https://github.com/TuringLang/docs/edit/main/getting-started/index.qmd" target="_blank" class="toc-action"><i class="bi bi-github"></i>Edit this page</a></li><li><a href="https://github.com/TuringLang/docs/issues/new" target="_blank" class="toc-action"><i class="bi empty"></i>Report an issue</a></li></ul></div></nav> |
| 201 | + # We want to insert two new list items at the end of the ul. |
| 202 | + lines = split(html_content, '\n') |
| 203 | + new_lines = map(lines) do line |
| 204 | + if occursin(r"^<div class=\"toc-actions\">", line) |
| 205 | + insertion = ( |
| 206 | + "<li><a href=\"index.ipynb\" target=\"_blank\" class=\"toc-action\" download=\"$SUGGESTED_FILENAME\"><i class=\"bi bi-journal-code\"></i>Download notebook</a></li>" * |
| 207 | + "<li><a href=\"$COLAB_URL\" target=\"_blank\" class=\"toc-action\"><i class=\"bi bi-google\"></i>Open in Colab</a></li>" |
| 208 | + ) |
| 209 | + return replace(line, r"</ul>" => "$insertion</ul>") |
| 210 | + else |
| 211 | + return line |
| 212 | + end |
| 213 | + end |
| 214 | + new_html_content = join(new_lines, '\n') |
| 215 | + write(html_path, new_html_content) |
| 216 | + @info " - done." |
| 217 | +end |
| 218 | + |
| 219 | +function main(args) |
| 220 | + if length(args) == 0 |
| 221 | + # Get the list of .qmd files from the _quarto.yml file. This conveniently also |
| 222 | + # checks that we are at the repo root. |
| 223 | + qmd_files = try |
| 224 | + quarto_config = split(read("_quarto.yml", String), '\n') |
| 225 | + qmd_files = String[] |
| 226 | + for line in quarto_config |
| 227 | + m = match(r"^\s*-\s*(.+\.qmd)\s*$", line) |
| 228 | + if m !== nothing |
| 229 | + push!(qmd_files, m.captures[1]) |
| 230 | + end |
| 231 | + end |
| 232 | + qmd_files |
| 233 | + catch e |
| 234 | + if e isa SystemError |
| 235 | + error("Could not find _quarto.yml; please run this script from the repo root.") |
| 236 | + else |
| 237 | + rethrow(e) |
| 238 | + end |
| 239 | + end |
| 240 | + for file in qmd_files |
| 241 | + # Convert qmd to ipynb |
| 242 | + dir = "_site/" * dirname(file) |
| 243 | + ipynb_base = replace(basename(file), r"\.qmd$" => ".ipynb") |
| 244 | + isdir(dir) || mkpath(dir) # mkpath is essentially mkdir -p |
| 245 | + out_ipynb_path = joinpath(dir, ipynb_base) |
| 246 | + convert_qmd_to_ipynb(file, out_ipynb_path) |
| 247 | + # Add a link in the corresponding html file |
| 248 | + html_base = replace(basename(file), r"\.qmd$" => ".html") |
| 249 | + out_html_path = joinpath(dir, html_base) |
| 250 | + add_ipynb_link_to_html(out_html_path, out_ipynb_path) |
| 251 | + end |
| 252 | + elseif length(args) == 2 |
| 253 | + in_qmd_path, out_ipynb_path = args |
| 254 | + convert_qmd_to_ipynb(in_qmd_path, out_ipynb_path) |
| 255 | + add_ipynb_link_to_html(replace(out_ipynb_path, r"\.ipynb$" => ".html"), out_ipynb_path) |
| 256 | + end |
| 257 | +end |
| 258 | +@main |
0 commit comments