From ad6542063362b5fc2b2f8c8569ecf01589762c09 Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Fri, 12 May 2023 17:19:17 -0600 Subject: [PATCH 01/14] Clean up indexpackage.jl, export as a function, and add some profiling --- registryindexer/main.jl | 10 ++-- src/indexpackage.jl | 113 ++++++++++++++++++++++------------------ 2 files changed, 67 insertions(+), 56 deletions(-) diff --git a/registryindexer/main.jl b/registryindexer/main.jl index c051a1fa..41674695 100644 --- a/registryindexer/main.jl +++ b/registryindexer/main.jl @@ -27,7 +27,7 @@ function get_all_package_versions(;max_versions=typemax(Int)) }) |> @mutate( versions = (Pkg.TOML.parsefile(joinpath(registry_folder_path, _.path, "Versions.toml")) |> - @map(i->{version=VersionNumber(i[1]), treehash=i[2]["git-tree-sha1"]}) |> + @map(i->{version=VersionNumber(i[1]), treehash=i[2]["git-tree-sha1"]}) |> @orderby_descending(i->i.version) |> @take(max_versions) |> collect) @@ -119,9 +119,9 @@ true || asyncmap(julia_versions) do v # error_filename = "v$(versionwithoutplus)_$(v.treehash).unavailable" # # Write them to a file - # open(joinpath(path, error_filename), "w") do io + # open(joinpath(path, error_filename), "w") do io # end - + # Pkg.PlatformEngines.package(path, cache_path) # end @@ -186,7 +186,7 @@ asyncmap(unindexed_packageversions, ntasks=max_tasks) do v cache_path_compressed = joinpath(cache_path, "v$(versionwithoutplus)_$(v.treehash).tar.gz") mktempdir() do path - res = execute(`docker run --rm --mount type=bind,source="$path",target=/symcache juliavscodesymbolindexer:$(first(julia_versions)) julia SymbolServer/src/indexpackage.jl $(v.name) $(v.version) $(v.uuid) $(v.treehash)`) + res = execute(`docker run --rm --mount type=bind,source="$path",target=/symcache juliavscodesymbolindexer:$(first(julia_versions)) julia SymbolServer/src/indexpackage.jl $(v.name) $(v.version) $(v.uuid) $(v.treehash) /symcache`) if res.code==37 # This is our magic error code that indicates everything worked global count_successfully_cached += 1 @@ -209,7 +209,7 @@ asyncmap(unindexed_packageversions, ntasks=max_tasks) do v isfile(joinpath(path, error_filename)) && rm(joinpath(path, error_filename)) # Write them to a file - open(joinpath(path, error_filename), "w") do io + open(joinpath(path, error_filename), "w") do io end open(joinpath(cache_folder, "logs", res.code==10 ? "packageloadfailure" : res.code==20 ? "packageinstallfailure" : "packageindexfailure", "log_$(v.name)_v$(versionwithoutplus)_stdout.txt"), "w") do f diff --git a/src/indexpackage.jl b/src/indexpackage.jl index 421a0bf4..1553180b 100644 --- a/src/indexpackage.jl +++ b/src/indexpackage.jl @@ -3,63 +3,74 @@ module SymbolServer using Pkg, SHA using Base: UUID -current_package_name = Symbol(ARGS[1]) -current_package_version = VersionNumber(ARGS[2]) -current_package_uuid = UUID(ARGS[3]) -current_package_treehash = ARGS[4] - -@info "Indexing package $current_package_name $current_package_version..." - -# This path will always be mounted in the docker container in which we are running -store_path = "/symcache" - -current_package_versionwithoutplus = replace(string(current_package_version), '+'=>'_') -filename_with_extension = "v$(current_package_versionwithoutplus)_$current_package_treehash.jstore" - -module LoadingBay end - -try - Pkg.add(name=string(current_package_name), version=current_package_version) -catch err - @info "Could not install package, exiting" - exit(20) +@time "Initial includes" begin + include("faketypes.jl") + include("symbols.jl") + include("utils.jl") + include("serialize.jl") + using .CacheStore end -# TODO Make the code below ONLY write a cache file for the package we just added here. -include("faketypes.jl") -include("symbols.jl") -include("utils.jl") -include("serialize.jl") -using .CacheStore +module LoadingBay end -# Load package -m = try - LoadingBay.eval(:(import $current_package_name)) - getfield(LoadingBay, current_package_name) -catch e - @info "Could not load package, exiting." - exit(10) +function index_package(name, version, uuid, treehash) + @time "Indexing package $name $version..." begin + versionwithoutplus = replace(string(version), '+'=>'_') + filename_with_extension = "v$(versionwithoutplus)_$treehash.jstore" + + # Load package + m = try + @time "Loading $name $version" begin + LoadingBay.eval(:(import $name)) + getfield(LoadingBay, name) + end + catch e + @info "Could not load package $name $version ($uuid): $e" + return 10 + end + + # Get the symbols + env = @time "getenvtree" getenvtree([name]) + @time "symbols" symbols(env, m, get_return_type=true) + + # Strip out paths + @time "modify_dirs" begin + modify_dirs( + env[name], + f -> modify_dir(f, pkg_src_dir(Base.loaded_modules[Base.PkgId(uuid, string(name))]), "PLACEHOLDER") + ) + end + + # The destination path must be where SymbolServer.jl expects it + dir = joinpath( + store_path, + string(uppercase(string(name)[1])), + string(name, "_", uuid), + ) + + mkpath(dir) + + @time "CacheStore.write" begin + open(joinpath(dir, filename_with_extension), "w") do io + CacheStore.write(io, Package(string(name), env[name], uuid, nothing)) + end + end + end + + # Exit with a custom error code to indicate success. This allows + # the parent process to distinguish between a successful run and one + # where the package exited the process. + return 37 end -# Get the symbols -env = getenvtree([current_package_name]) -symbols(env, m, get_return_type=true) - - # Strip out paths -modify_dirs(env[current_package_name], f -> modify_dir(f, pkg_src_dir(Base.loaded_modules[Base.PkgId(current_package_uuid, string(current_package_name))]), "PLACEHOLDER")) - -# There's an issue here - @enum used within CSTParser seems to add a method that is introduced from Enums.jl... +if abspath(PROGRAM_FILE) == @__FILE__ + name = Symbol(ARGS[1]) + version = VersionNumber(ARGS[2]) + uuid = UUID(ARGS[3]) + treehash = ARGS[4] + store_path = ARGS[5] -# Write them to a file -open(joinpath(store_path, filename_with_extension), "w") do io - CacheStore.write(io, Package(string(current_package_name), env[current_package_name], current_package_uuid, nothing)) + exit(index_package(name, version, uuid, treehash)) end -@info "Finished indexing." - -# We are exiting with a custom error code to indicate success. This allows -# the parent process to distinguish between a successful run and one -# where the package exited the process. -exit(37) - end From 7f224f3e9002d0a03eb5f7f0b3ef094ca149263d Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Mon, 15 May 2023 18:00:13 -0600 Subject: [PATCH 02/14] spelling: disc -> disk, see https://grammarist.com/spelling/disc-disk/ --- src/SymbolServer.jl | 12 ++++++------ src/utils.jl | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/SymbolServer.jl b/src/SymbolServer.jl index ffb56a73..caf8ccea 100644 --- a/src/SymbolServer.jl +++ b/src/SymbolServer.jl @@ -41,7 +41,7 @@ function getstore(ssi::SymbolServerInstance, environment_path::AbstractString, p let manifest = read_manifest(manifest_filename) if manifest !== nothing @debug "Downloading cache files for manifest at $(manifest_filename)." - to_download = collect(validate_disc_store(ssi.store_path, manifest)) + to_download = collect(validate_disk_store(ssi.store_path, manifest)) batches = Iterators.partition(to_download, max(1, floor(Int, length(to_download)รท50))) for (i, batch) in enumerate(batches) percentage = round(Int, 100*(i - 1)/length(batches)) @@ -138,7 +138,7 @@ function getstore(ssi::SymbolServerInstance, environment_path::AbstractString, p if success(p) # Now we create a new symbol store and load everything into that - # from disc + # from disk new_store = recursive_copy(stdlibs) load_project_packages_into_store!(ssi, environment_path, new_store, progress_callback) @debug "SymbolStore: store success" @@ -201,7 +201,7 @@ end """ load_package_from_cache_into_store!(ssp::SymbolServerInstance, uuid, store) -Tries to load the on-disc stored cache for a package (uuid). Attempts to generate (and save to disc) a new cache if the file does not exist or is unopenable. +Tries to load the on-disk stored cache for a package (uuid). Attempts to generate (and save to disk) a new cache if the file does not exist or is unopenable. """ function load_package_from_cache_into_store!(ssi::SymbolServerInstance, uuid::UUID, environment_path, manifest, store, progress_callback = nothing, percentage = missing) yield() @@ -234,7 +234,7 @@ function load_package_from_cache_into_store!(ssi::SymbolServerInstance, uuid::UU end catch err Base.display_error(stderr, err, catch_backtrace()) - @warn "Tried to load $pe_name but failed to load from disc, re-caching." + @warn "Tried to load $pe_name but failed to load from disk, re-caching." try rm(cache_path) catch err2 @@ -244,12 +244,12 @@ function load_package_from_cache_into_store!(ssi::SymbolServerInstance, uuid::UU end end else - @warn "$(pe_name) not stored on disc" + @warn "$(pe_name) not stored on disk" store[Symbol(pe_name)] = ModuleStore(VarRef(nothing, Symbol(pe_name)), Dict{Symbol,Any}(), "$pe_name failed to load.", true, Symbol[], Symbol[]) end end -function clear_disc_store(ssi::SymbolServerInstance) +function clear_disk_store(ssi::SymbolServerInstance) for f in readdir(ssi.store_path) if occursin(f, "ABCDEFGHIJKLMNOPQRSTUVWXYZ") rm(joinpath(ssi.store_path, f), recursive = true) diff --git a/src/utils.jl b/src/utils.jl index 9663c288..4ee5ce7f 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -531,11 +531,11 @@ function get_file_from_cloud(manifest, uuid, environment_path, depot_dir, cache_ end """ - validate_disc_store(store_path, manifest) + validate_disk_store(store_path, manifest) -This returns a list of non-jll packages in the manifest that don't have caches on disc. +This returns a list of non-jll packages in the manifest that don't have caches on disk. """ -function validate_disc_store(store_path, manifest) +function validate_disk_store(store_path, manifest) filter(manifest) do pkg uuid = packageuuid(pkg) endswith(packagename(manifest, uuid), "_jll") && return false @@ -637,7 +637,7 @@ end function write_cache(uuid, pkg::Package, outpath) mkpath(dirname(outpath)) - @info "Now writing to disc $uuid" + @info "Now writing to disk $uuid" open(outpath, "w") do io CacheStore.write(io, pkg) end From 098bc5a77674ea4ae7a9ff89dbbdfd27ce1ac3a5 Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Mon, 15 May 2023 18:00:40 -0600 Subject: [PATCH 03/14] grammar: it's -> its --- src/symbols.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/symbols.jl b/src/symbols.jl index f3544897..bf30d95a 100644 --- a/src/symbols.jl +++ b/src/symbols.jl @@ -472,8 +472,7 @@ function load_core(; get_return_type = false) symbols(cache, get_return_type = get_return_type) cache[:Main] = ModuleStore(VarRef(nothing, :Main), Dict(), "", true, [], []) - # This is wrong. As per the docs the Base.include each module should have it's own - # version. + # This is wrong. As per the docs the Base.include each module should have its own version. push!(cache[:Base].exportednames, :include) # Add special cases for built-ins From ae0822419195b1713c7d3767e3690b36a77d1240 Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Wed, 17 May 2023 14:58:05 -0600 Subject: [PATCH 04/14] Whitespace fix --- src/symbols.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/symbols.jl b/src/symbols.jl index bf30d95a..18977fbe 100644 --- a/src/symbols.jl +++ b/src/symbols.jl @@ -177,7 +177,7 @@ function cache_methods(@nospecialize(f), name, env, get_return_type) # Get inferred method return type if get_return_type sparams = Core.svec(sparam_syms(m[3])...) - rt = try + rt = try @static if isdefined(Core.Compiler, :NativeInterpreter) Core.Compiler.typeinf_type(Core.Compiler.NativeInterpreter(), m[3], m[3].sig, sparams) else From bbccca6920ac0de16a201d18dba3c10624b94952 Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Wed, 17 May 2023 15:00:31 -0600 Subject: [PATCH 05/14] Add PrecompileTools and Manifest.toml --- Manifest.toml | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++ Project.toml | 15 +++--- 2 files changed, 152 insertions(+), 7 deletions(-) create mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 00000000..43f7dfe9 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,144 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.8.5" +manifest_format = "2.0" +project_hash = "e9b501a7a2bfe6bb16747a24c09b28861fd51dd6" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.3" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "7.84.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.10.2+0" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.0+0" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2022.2.1" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.8.0" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "259e206946c293698122f63e2b513a7c99a244e8" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.1.1" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "7eb1686b4f04b82f96ed7a4ea5890a4f0c7a09f1" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.0" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Random]] +deps = ["SHA", "Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.0" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.1" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.12+3" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.48.0+0" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+0" diff --git a/Project.toml b/Project.toml index 320f10dd..854d6a5a 100644 --- a/Project.toml +++ b/Project.toml @@ -3,21 +3,22 @@ uuid = "cf896787-08d5-524d-9de7-132aaa0cb996" version = "7.2.2-DEV" [deps] -Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" -LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433" +Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" +Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" Sockets = "6462fe0b-24de-5631-8697-dd941f90decc" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" -Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" - -[extras] -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] julia = "1" +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + [targets] test = ["Test"] From a831b832db7e580af2f99847e054b4ad53a0faf9 Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Wed, 17 May 2023 15:18:40 -0600 Subject: [PATCH 06/14] Rename version -> get_version in utils.jl --- src/utils.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 4ee5ce7f..685c8b41 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -111,7 +111,7 @@ function isinmanifest end deps(d::Dict{String,Any}) = get(d, "deps", Dict{String,Any}()) deps(pe::PackageEntry) = get(pe[1], "deps", Dict{String,Any}()) path(pe::PackageEntry) = get(pe[1], "path", nothing) - version(pe::PackageEntry) = get(pe[1], "version", nothing) + get_version(pe::PackageEntry) = get(pe[1], "version", nothing) tree_hash(pe) = get(pe[1], "git-tree-sha1", nothing) frommanifest(c::Pkg.Types.Context, uuid) = frommanifest(manifest(c), uuid) @@ -158,8 +158,8 @@ else deps(proj::Pkg.Types.Project) = proj.deps deps(pkg::Pair{String,UUID}, c::Pkg.Types.Context) = deps(packageuuid(pkg), c) path(pe::PackageEntry) = pe.path - version(pe::PackageEntry) = pe.version - version(pe::Pair{UUID,PackageEntry}) = last(pe).version + get_version(pe::PackageEntry) = pe.version + get_version(pe::Pair{UUID,PackageEntry}) = last(pe).version frommanifest(c::Pkg.Types.Context, uuid) = manifest(c)[uuid] frommanifest(manifest::Dict{UUID,PackageEntry}, uuid) = manifest[uuid] tree_hash(pe::PackageEntry) = VERSION >= v"1.3" ? pe.tree_hash : get(pe.other, "git-tree-sha1", nothing) @@ -652,7 +652,7 @@ Returns a vector containing the cache storage path for a package structured: [fo function get_cache_path(manifest, uuid) name = packagename(manifest, uuid) pkg_info = frommanifest(manifest, uuid) - ver = version(pkg_info) + ver = get_version(pkg_info) ver = ver === nothing ? "nothing" : ver ver = replace(string(ver), '+'=>'_') th = tree_hash(pkg_info) From 9f56d5a665a6aeaa531e668a494cdf9ab653ba11 Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Wed, 17 May 2023 18:11:02 -0600 Subject: [PATCH 07/14] Clearer indentation in symbols.jl --- src/symbols.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/symbols.jl b/src/symbols.jl index 18977fbe..26bcc3f1 100644 --- a/src/symbols.jl +++ b/src/symbols.jl @@ -400,7 +400,12 @@ function all_names(m, pred, symbols = Set(Symbol[]), seen = Set(Module[])) symbols end -function symbols(env::EnvStore, m::Union{Module,Nothing} = nothing, allnames::Base.IdSet{Symbol} = getallns(), visited = Base.IdSet{Module}(); get_return_type = false) +function symbols( + env::EnvStore, m::Union{Module,Nothing} = nothing, + allnames::Base.IdSet{Symbol} = getallns(), + visited = Base.IdSet{Module}(); + get_return_type = false +) if m isa Module cache = _lookup(VarRef(m), env, true) cache === nothing && return From da26d84ec292323bb0da932f4e701d0aa717ba34 Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Wed, 17 May 2023 18:12:15 -0600 Subject: [PATCH 08/14] Move index_package function to SymbolServer.jl --- src/SymbolServer.jl | 46 +++++++++++++++++++++++++++ src/indexpackage.jl | 75 ++++++++------------------------------------- 2 files changed, 58 insertions(+), 63 deletions(-) diff --git a/src/SymbolServer.jl b/src/SymbolServer.jl index caf8ccea..59cb0759 100644 --- a/src/SymbolServer.jl +++ b/src/SymbolServer.jl @@ -12,6 +12,8 @@ include("utils.jl") include("serialize.jl") using .CacheStore +export index_package + mutable struct SymbolServerInstance process::Union{Nothing,Base.Process} depot_path::String @@ -257,6 +259,50 @@ function clear_disk_store(ssi::SymbolServerInstance) end end +function index_package( + name::Symbol, + version::VersionNumber, + uuid::UUID, + treehash::String, + store_path::String, + m::Module +) + @time "Indexing package $name $version..." begin + # Get the symbols + env = @time "getenvtree" getenvtree([name]) + @time "symbols" symbols(env, m, get_return_type=true) + + # Strip out paths + @time "modify_dirs" begin + modify_dirs( + env[name], + f -> modify_dir(f, pkg_src_dir(Base.loaded_modules[Base.PkgId(uuid, string(name))]), "PLACEHOLDER") + ) + end + + # The destination path must be where SymbolServer.jl expects it + dir = joinpath( + store_path, + string(uppercase(string(name)[1])), + string(name, "_", uuid), + ) + + mkpath(dir) + + @time "CacheStore.write" begin + filename_with_extension = "v$(replace(string(version), '+'=>'_'))_$treehash.jstore" + open(joinpath(dir, filename_with_extension), "w") do io + CacheStore.write(io, Package(string(name), env[name], uuid, nothing)) + end + end + end + + # Exit with a custom error code to indicate success. This allows + # the parent process to distinguish between a successful run and one + # where the package exited the process. + return 37 +end + const stdlibs = load_core() function _precompile_() diff --git a/src/indexpackage.jl b/src/indexpackage.jl index 1553180b..a6eb3fad 100644 --- a/src/indexpackage.jl +++ b/src/indexpackage.jl @@ -1,68 +1,8 @@ -module SymbolServer -using Pkg, SHA -using Base: UUID - -@time "Initial includes" begin - include("faketypes.jl") - include("symbols.jl") - include("utils.jl") - include("serialize.jl") - using .CacheStore -end +import SymbolServer module LoadingBay end -function index_package(name, version, uuid, treehash) - @time "Indexing package $name $version..." begin - versionwithoutplus = replace(string(version), '+'=>'_') - filename_with_extension = "v$(versionwithoutplus)_$treehash.jstore" - - # Load package - m = try - @time "Loading $name $version" begin - LoadingBay.eval(:(import $name)) - getfield(LoadingBay, name) - end - catch e - @info "Could not load package $name $version ($uuid): $e" - return 10 - end - - # Get the symbols - env = @time "getenvtree" getenvtree([name]) - @time "symbols" symbols(env, m, get_return_type=true) - - # Strip out paths - @time "modify_dirs" begin - modify_dirs( - env[name], - f -> modify_dir(f, pkg_src_dir(Base.loaded_modules[Base.PkgId(uuid, string(name))]), "PLACEHOLDER") - ) - end - - # The destination path must be where SymbolServer.jl expects it - dir = joinpath( - store_path, - string(uppercase(string(name)[1])), - string(name, "_", uuid), - ) - - mkpath(dir) - - @time "CacheStore.write" begin - open(joinpath(dir, filename_with_extension), "w") do io - CacheStore.write(io, Package(string(name), env[name], uuid, nothing)) - end - end - end - - # Exit with a custom error code to indicate success. This allows - # the parent process to distinguish between a successful run and one - # where the package exited the process. - return 37 -end - if abspath(PROGRAM_FILE) == @__FILE__ name = Symbol(ARGS[1]) version = VersionNumber(ARGS[2]) @@ -70,7 +10,16 @@ if abspath(PROGRAM_FILE) == @__FILE__ treehash = ARGS[4] store_path = ARGS[5] - exit(index_package(name, version, uuid, treehash)) -end + # Load package + m = try + @time "Loading $name $version" begin + LoadingBay.eval(:(import $name)) + getfield(LoadingBay, name) + end + catch e + @info "Could not load package $name $version ($uuid): $e" + return 10 + end + exit(SymbolServer.index_package(name, version, uuid, treehash, store_path, m)) end From 6d0efb4a741b5aa1126e87fea616b097b8c25389 Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Wed, 17 May 2023 18:37:19 -0600 Subject: [PATCH 09/14] Extract index_packages from server.jl to SymbolServer.jl --- src/SymbolServer.jl | 108 ++++++++++++++++++++++++++++++++++++++++- src/server.jl | 115 ++------------------------------------------ 2 files changed, 111 insertions(+), 112 deletions(-) diff --git a/src/SymbolServer.jl b/src/SymbolServer.jl index 59cb0759..4fa6533c 100644 --- a/src/SymbolServer.jl +++ b/src/SymbolServer.jl @@ -12,7 +12,7 @@ include("utils.jl") include("serialize.jl") using .CacheStore -export index_package +export index_package, index_packages mutable struct SymbolServerInstance process::Union{Nothing,Base.Process} @@ -303,6 +303,112 @@ function index_package( return 37 end +module LoadingBay end + +# Method to check whether a package is part of the standard library and so +# won't need recaching. +function is_stdlib(uuid::UUID) + if isdefined(Pkg.Types, :is_stdlib) + return Pkg.Types.is_stdlib(uuid) + else + return uuid in keys(ctx.stdlibs) + end +end + +function index_packages(conn, store_path::String) + start_time = time_ns() + + ctx = try + Pkg.Types.Context() + catch err + @info "Package environment can't be read." + exit() + end + + server = Server(store_path, ctx, Dict{UUID,Package}()) + + written_caches = String[] # List of caches that have already been written + toplevel_pkgs = deps(project(ctx)) # First get a list of all package UUIds that we want to cache + packages_to_load = [] + + # Next make sure the cache is up-to-date for all of these + for (pk_name, uuid) in toplevel_pkgs + uuid isa UUID || (uuid = UUID(uuid)) + if !isinmanifest(ctx, uuid) + @info "$pk_name not in manifest, skipping." + continue + end + pe = frommanifest(manifest(ctx), uuid) + cache_path = joinpath(server.storedir, SymbolServer.get_cache_path(manifest(ctx), uuid)...) + + if isfile(cache_path) + if is_package_deved(manifest(ctx), uuid) + try + cached_version = open(cache_path) do io + CacheStore.read(io) + end + if sha_pkg(frommanifest(manifest(ctx), uuid)) != cached_version.sha + @info "Outdated sha, will recache package $pk_name ($uuid)" + push!(packages_to_load, uuid) + else + @info "Package $pk_name ($uuid) is cached." + end + catch err + @info "Couldn't load $pk_name ($uuid) from file, will recache." + end + else + @info "Package $pk_name ($uuid) is cached." + end + else + @info "Will cache package $pk_name ($uuid)" + push!(packages_to_load, uuid) + end + end + + visited = Base.IdSet{Module}([Base, Core]) + + @debug "Packages to load: $packages_to_load" + + # Load all packages together + for (i, uuid) in enumerate(packages_to_load) + load_package(ctx, uuid, conn, LoadingBay, round(Int, 100*(i - 1)/length(packages_to_load))) + + # XXX: The following *may* duplicate some work, but we want to make sure that interrupts of + # the SymbolServer process don't invalidate *all* work done (which would happen when only + # writing the cache files out after all packages are loaded) + + # Create image of whole package env. This creates the module structure only. + env_symbols = getenvtree() + + # Populate the above with symbols, skipping modules that don't need caching. + # symbols (env_symbols) + # don't need to cache these each time... + for (pid, m) in Base.loaded_modules + if pid.uuid !== nothing && + is_stdlib(pid.uuid) && + isinmanifest(ctx, pid.uuid) && + isfile(joinpath(server.storedir, SymbolServer.get_cache_path(manifest(ctx), pid.uuid)...)) + push!(visited, m) + delete!(env_symbols, Symbol(pid.name)) + end + end + + symbols(env_symbols, nothing, getallns(), visited) + + # Wrap the `ModuleStore`s as `Package`s. + for (pkg_name, cache) in env_symbols + !isinmanifest(ctx, String(pkg_name)) && continue + uuid = packageuuid(ctx, String(pkg_name)) + pe = frommanifest(ctx, uuid) + server.depot[uuid] = Package(String(pkg_name), cache, uuid, sha_pkg(pe)) + end + + write_depot(server, server.context, written_caches) + end + + @info "Symbol server indexing took $((time_ns() - start_time) / 1e9) seconds." +end + const stdlibs = load_core() function _precompile_() diff --git a/src/server.jl b/src/server.jl index 0d082922..7a5f4323 100644 --- a/src/server.jl +++ b/src/server.jl @@ -1,13 +1,12 @@ -module SymbolServer + +import Sockets +import SymbolServer !in("@stdlib", LOAD_PATH) && push!(LOAD_PATH, "@stdlib") # Make sure we can load stdlibs -import Sockets pipename = length(ARGS) > 1 ? ARGS[2] : nothing conn = pipename !== nothing ? Sockets.connect(pipename) : nothing -start_time = time_ns() - # Try to lower the priority of this process so that it doesn't block the # user system. @static if Sys.iswindows() @@ -22,115 +21,9 @@ else # We don't check the return value because it doesn't really matter end -module LoadingBay -end - -using Pkg, SHA -using Base: UUID - -include("faketypes.jl") -include("symbols.jl") -include("utils.jl") -include("serialize.jl") -using .CacheStore - store_path = length(ARGS) > 0 ? ARGS[1] : abspath(joinpath(@__DIR__, "..", "store")) -ctx = try - Pkg.Types.Context() -catch err - @info "Package environment can't be read." - exit() -end -# Add some methods to check whether a package is part of the standard library and so -# won't need recaching. -if isdefined(Pkg.Types, :is_stdlib) - is_stdlib(uuid::UUID) = Pkg.Types.is_stdlib(uuid) -else - is_stdlib(uuid::UUID) = uuid in keys(ctx.stdlibs) -end - -server = Server(store_path, ctx, Dict{UUID,Package}()) - -written_caches = String[] # List of caches that have already been written -toplevel_pkgs = deps(project(ctx)) # First get a list of all package UUIds that we want to cache -packages_to_load = [] -# Next make sure the cache is up-to-date for all of these -for (pk_name, uuid) in toplevel_pkgs - uuid isa UUID || (uuid = UUID(uuid)) - if !isinmanifest(ctx, uuid) - @info "$pk_name not in manifest, skipping." - continue - end - pe = frommanifest(manifest(ctx), uuid) - cache_path = joinpath(server.storedir, SymbolServer.get_cache_path(manifest(ctx), uuid)...) - - if isfile(cache_path) - if is_package_deved(manifest(ctx), uuid) - try - cached_version = open(cache_path) do io - CacheStore.read(io) - end - if sha_pkg(frommanifest(manifest(ctx), uuid)) != cached_version.sha - @info "Outdated sha, will recache package $pk_name ($uuid)" - push!(packages_to_load, uuid) - else - @info "Package $pk_name ($uuid) is cached." - end - catch err - @info "Couldn't load $pk_name ($uuid) from file, will recache." - end - else - @info "Package $pk_name ($uuid) is cached." - end - else - @info "Will cache package $pk_name ($uuid)" - push!(packages_to_load, uuid) - end -end - -visited = Base.IdSet{Module}([Base, Core]) - -# Load all packages together -for (i, uuid) in enumerate(packages_to_load) - load_package(ctx, uuid, conn, LoadingBay, round(Int, 100*(i - 1)/length(packages_to_load))) - - # XXX: The following *may* duplicate some work, but we want to make sure that interrupts of - # the SymbolServer process don't invalidate *all* work done (which would happen when only - # writing the cache files out after all packages are loaded) - - # Create image of whole package env. This creates the module structure only. - env_symbols = getenvtree() - - # Populate the above with symbols, skipping modules that don't need caching. - # symbols (env_symbols) - # don't need to cache these each time... - for (pid, m) in Base.loaded_modules - if pid.uuid !== nothing && - is_stdlib(pid.uuid) && - isinmanifest(ctx, pid.uuid) && - isfile(joinpath(server.storedir, SymbolServer.get_cache_path(manifest(ctx), pid.uuid)...)) - push!(visited, m) - delete!(env_symbols, Symbol(pid.name)) - end - end - - symbols(env_symbols, nothing, getallns(), visited) - - # Wrap the `ModuleStore`s as `Package`s. - for (pkg_name, cache) in env_symbols - !isinmanifest(ctx, String(pkg_name)) && continue - uuid = packageuuid(ctx, String(pkg_name)) - pe = frommanifest(ctx, uuid) - server.depot[uuid] = Package(String(pkg_name), cache, uuid, sha_pkg(pe)) - end - - write_depot(server, server.context, written_caches) -end - -@info "Symbol server indexing took $((time_ns() - start_time) / 1e9) seconds." +SymbolServer.index_packages(conn, store_path) println(conn, "DONE") close(conn) - -end From b39560d45346da982c3949cb9cd44fdea2410b23 Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Wed, 17 May 2023 18:39:14 -0600 Subject: [PATCH 10/14] Better log message in write_cache --- src/utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.jl b/src/utils.jl index 685c8b41..9b715187 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -637,7 +637,7 @@ end function write_cache(uuid, pkg::Package, outpath) mkpath(dirname(outpath)) - @info "Now writing to disk $uuid" + @info "Now writing to disk $(pkg.name) ($uuid)" open(outpath, "w") do io CacheStore.write(io, pkg) end From fc46b37451937b16433cc8e0e8c7c2ee3d27d554 Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Thu, 18 May 2023 03:37:14 -0600 Subject: [PATCH 11/14] Warn on exception in load_package --- src/utils.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/utils.jl b/src/utils.jl index 9b715187..088f0bf6 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -629,7 +629,8 @@ function load_package(c::Pkg.Types.Context, uuid, conn, loadingbay, percentage = loadingbay.eval(:(import $(Symbol(pe_name)))) conn !== nothing && println(conn, "STOPLOAD;$pe_name") m = getfield(loadingbay, Symbol(pe_name)) - catch + catch e + @warn "Exception loading $uuid: $e" return end end From cfe81ec7830e6d8881191b3af7b35f143e4cd3eb Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Thu, 18 May 2023 03:37:44 -0600 Subject: [PATCH 12/14] Fix index_packages loading bay and improve perf --- src/SymbolServer.jl | 69 ++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/src/SymbolServer.jl b/src/SymbolServer.jl index 4fa6533c..581f8714 100644 --- a/src/SymbolServer.jl +++ b/src/SymbolServer.jl @@ -303,8 +303,6 @@ function index_package( return 37 end -module LoadingBay end - # Method to check whether a package is part of the standard library and so # won't need recaching. function is_stdlib(uuid::UUID) @@ -315,7 +313,7 @@ function is_stdlib(uuid::UUID) end end -function index_packages(conn, store_path::String) +function index_packages(conn, store_path::String, loadingbay) start_time = time_ns() ctx = try @@ -367,45 +365,46 @@ function index_packages(conn, store_path::String) visited = Base.IdSet{Module}([Base, Core]) - @debug "Packages to load: $packages_to_load" - # Load all packages together for (i, uuid) in enumerate(packages_to_load) - load_package(ctx, uuid, conn, LoadingBay, round(Int, 100*(i - 1)/length(packages_to_load))) - - # XXX: The following *may* duplicate some work, but we want to make sure that interrupts of - # the SymbolServer process don't invalidate *all* work done (which would happen when only - # writing the cache files out after all packages are loaded) - - # Create image of whole package env. This creates the module structure only. - env_symbols = getenvtree() - - # Populate the above with symbols, skipping modules that don't need caching. - # symbols (env_symbols) - # don't need to cache these each time... - for (pid, m) in Base.loaded_modules - if pid.uuid !== nothing && - is_stdlib(pid.uuid) && - isinmanifest(ctx, pid.uuid) && - isfile(joinpath(server.storedir, SymbolServer.get_cache_path(manifest(ctx), pid.uuid)...)) - push!(visited, m) - delete!(env_symbols, Symbol(pid.name)) - end - end - - symbols(env_symbols, nothing, getallns(), visited) + @info "Loading: $uuid" + load_package(ctx, uuid, conn, loadingbay, round(Int, 100*(i - 1)/length(packages_to_load))) + end - # Wrap the `ModuleStore`s as `Package`s. - for (pkg_name, cache) in env_symbols - !isinmanifest(ctx, String(pkg_name)) && continue - uuid = packageuuid(ctx, String(pkg_name)) - pe = frommanifest(ctx, uuid) - server.depot[uuid] = Package(String(pkg_name), cache, uuid, sha_pkg(pe)) + # This used to run all of the following *inside* the loop over package_to_load above. + # This duplicated a lot of work; moving it outside the loop made the time go from 109.1 seconds to 12.2 seconds + # for indexing an environment with only "Plots". + # The old method, while inefficient, allowed SymbolServer to write its work periodically, so some symbol cache files + # could be written even if the symbol server was killed while working. + # To get the best of both worlds, it would be best to refactor to actually process package-by-package, rather + # than operating globally with getenvtree(), getallns(), etc. + + # Create image of whole package env. This creates the module structure only. + env_symbols = getenvtree() + + # Populate the above with symbols, skipping modules that don't need caching. + for (pid, m) in Base.loaded_modules + if pid.uuid !== nothing && + is_stdlib(pid.uuid) && + isinmanifest(ctx, pid.uuid) && + isfile(joinpath(server.storedir, SymbolServer.get_cache_path(manifest(ctx), pid.uuid)...)) + push!(visited, m) + delete!(env_symbols, Symbol(pid.name)) end + end - write_depot(server, server.context, written_caches) + symbols(env_symbols, nothing, getallns(), visited) + + # Wrap the `ModuleStore`s as `Package`s. + for (pkg_name, cache) in env_symbols + !isinmanifest(ctx, String(pkg_name)) && continue + uuid = packageuuid(ctx, String(pkg_name)) + pe = frommanifest(ctx, uuid) + server.depot[uuid] = Package(String(pkg_name), cache, uuid, sha_pkg(pe)) end + write_depot(server, server.context, written_caches) + @info "Symbol server indexing took $((time_ns() - start_time) / 1e9) seconds." end From 52d46776c970c155186584154fdfc1bebc589b0d Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Fri, 19 May 2023 04:08:00 -0600 Subject: [PATCH 13/14] Don't try to do load_core when running through server.jl --- src/SymbolServer.jl | 4 +++- src/server.jl | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/SymbolServer.jl b/src/SymbolServer.jl index 581f8714..79457524 100644 --- a/src/SymbolServer.jl +++ b/src/SymbolServer.jl @@ -408,7 +408,9 @@ function index_packages(conn, store_path::String, loadingbay) @info "Symbol server indexing took $((time_ns() - start_time) / 1e9) seconds." end -const stdlibs = load_core() +if !haskey(ENV, "SKIP_LOAD_CORE") + const stdlibs = load_core() +end function _precompile_() ccall(:jl_generating_output, Cint, ()) == 1 || return nothing diff --git a/src/server.jl b/src/server.jl index 7a5f4323..4b2cfe19 100644 --- a/src/server.jl +++ b/src/server.jl @@ -1,5 +1,8 @@ import Sockets + +ENV["SKIP_LOAD_CORE"] = "true" + import SymbolServer !in("@stdlib", LOAD_PATH) && push!(LOAD_PATH, "@stdlib") # Make sure we can load stdlibs @@ -23,7 +26,9 @@ end store_path = length(ARGS) > 0 ? ARGS[1] : abspath(joinpath(@__DIR__, "..", "store")) -SymbolServer.index_packages(conn, store_path) +module LoadingBay end + +SymbolServer.index_packages(conn, store_path, LoadingBay) println(conn, "DONE") close(conn) From 1badb724cebef0ae867c8c1f73cb08efe5b6e291 Mon Sep 17 00:00:00 2001 From: Tom McLaughlin Date: Fri, 19 May 2023 04:39:52 -0600 Subject: [PATCH 14/14] Switch to Julia 1.6 compatible @time calls --- src/SymbolServer.jl | 10 +++++----- src/indexpackage.jl | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/SymbolServer.jl b/src/SymbolServer.jl index 79457524..eb9e2db2 100644 --- a/src/SymbolServer.jl +++ b/src/SymbolServer.jl @@ -267,13 +267,13 @@ function index_package( store_path::String, m::Module ) - @time "Indexing package $name $version..." begin + @time begin # Get the symbols - env = @time "getenvtree" getenvtree([name]) - @time "symbols" symbols(env, m, get_return_type=true) + env = @time getenvtree([name]) + @time symbols(env, m, get_return_type=true) # Strip out paths - @time "modify_dirs" begin + @time begin modify_dirs( env[name], f -> modify_dir(f, pkg_src_dir(Base.loaded_modules[Base.PkgId(uuid, string(name))]), "PLACEHOLDER") @@ -289,7 +289,7 @@ function index_package( mkpath(dir) - @time "CacheStore.write" begin + @time begin filename_with_extension = "v$(replace(string(version), '+'=>'_'))_$treehash.jstore" open(joinpath(dir, filename_with_extension), "w") do io CacheStore.write(io, Package(string(name), env[name], uuid, nothing)) diff --git a/src/indexpackage.jl b/src/indexpackage.jl index a6eb3fad..dbf94693 100644 --- a/src/indexpackage.jl +++ b/src/indexpackage.jl @@ -12,7 +12,7 @@ if abspath(PROGRAM_FILE) == @__FILE__ # Load package m = try - @time "Loading $name $version" begin + @time begin LoadingBay.eval(:(import $name)) getfield(LoadingBay, name) end