Files
xet-core/Cargo.toml
Hoyt Koepke d575b593d4 Make hf_xet safe(ish) across python os.fork() (#437)
This PR ensures that none of the tokio thread state exists through a
call to python's os.fork() as used in the multiprocessing library. For
an explanation of the issue, see
https://github.com/vllm-project/vllm/blob/main/docs/design/multiprocessing.md#tradeoffs.

It does this by offloading all the async calls to a separate and
transient OS thread, which would not exist after the spawn process. Thus
any possible restart of the tokio runtime due to a spawn would occur in
a clean environment and without thread-local storage causing issues.

To accomplish this, this PR refactors the hf_xet logging layer to
separate it out from the python runtime, as the python runtime is not
Send/Sync. This also simplifies this layer somewhat and isolates the
telemetry reporting logic so that only the background sending thread of
the telemetry logic is restarted after a spawn.

In addition, this PR removes the use of parking_lot, both in
singleflight.rs and as part of tokio. The library is not safe across
fork(); in particular, note
9c810e4a11/core/src/parking_lot.rs (L51).
2025-08-05 15:23:48 -07:00

111 lines
2.0 KiB
TOML

[workspace]
resolver = "2"
members = [
"cas_client",
"cas_object",
"cas_types",
"chunk_cache",
"data",
"deduplication",
"error_printer",
"file_utils",
"mdb_shard",
"merklehash",
"parutils",
"progress_tracking",
"utils",
"xet_threadpool",
]
exclude = ["chunk_cache_bench", "hf_xet", "hf_xet_wasm", "hf_xet_thin_wasm"]
[profile.release]
opt-level = 3
lto = true
debug = 1
[profile.opt-test]
inherits = "dev"
opt-level = 3
debug = 1
[workspace.dependencies]
anyhow = "1"
async-scoped = { version = "0.7", features = ["use-tokio"] }
async-trait = "0.1"
base64 = "0.22"
bincode = "1.3"
bitflags = { version = "2.9", features = ["serde"] }
blake3 = "1.5"
bytes = "1.8"
chrono = "0.4"
clap = { version = "4", features = ["derive"] }
colored = "2"
countio = { version = "0.2", features = ["futures"] }
crc32fast = "1.4"
csv = "1"
ctor = "0.4"
derivative = "2.2.0"
dirs = "5.0"
futures = "0.3"
futures-util = "0.3"
gearhash = "0.1"
getrandom = "0.3"
half = "2.4"
heed = "0.11"
http = "1"
itertools = "0.14"
jsonwebtoken = "9.3"
lazy_static = "1.5"
libc = "0.2"
lz4_flex = "0.11"
mockall = "0.13"
more-asserts = "0.3"
once_cell = "1.20"
pin-project = "1"
prometheus = "0.14"
rand = "0.9"
rand_chacha = "0.9"
rayon = "1.5"
regex = "1"
rustc-hash = "1.1"
safe-transmute = "0.11"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_repr = "0.1"
sha2 = "0.10"
static_assertions = "1.1"
tempfile = "3.20"
thiserror = "2.0"
tokio = { version = "1.47" }
tokio-retry = "0.3"
tokio-util = { version = "0.7" }
tracing = "0.1"
ulid = "1.2"
uuid = "1"
url = "2.5"
walkdir = "2"
web-time = "1.1.0"
whoami = "1"
heapify = "0.2"
shellexpand = "3.1.1"
oneshot = "0.1.8"
# windows
winapi = { version = "0.3", features = [
"winerror",
"winnt",
"handleapi",
"processthreadsapi",
"securitybaseapi",
] }
# dev-deps
criterion = { version = "0.5", features = ["html_reports"] }
httpmock = "0.7"
serial_test = "3"
tempdir = "0.3"
tracing-test = { version = "0.2", features = ["no-env-filter"] }
wiremock = "0.6"