mirror of
https://github.com/huggingface/xet-core.git
synced 2026-06-04 13:30:29 +08:00
This PR ensures that none of the tokio thread state exists through a
call to python's os.fork() as used in the multiprocessing library. For
an explanation of the issue, see
https://github.com/vllm-project/vllm/blob/main/docs/design/multiprocessing.md#tradeoffs.
It does this by offloading all the async calls to a separate and
transient OS thread, which would not exist after the spawn process. Thus
any possible restart of the tokio runtime due to a spawn would occur in
a clean environment and without thread-local storage causing issues.
To accomplish this, this PR refactors the hf_xet logging layer to
separate it out from the python runtime, as the python runtime is not
Send/Sync. This also simplifies this layer somewhat and isolates the
telemetry reporting logic so that only the background sending thread of
the telemetry logic is restarted after a spawn.
In addition, this PR removes the use of parking_lot, both in
singleflight.rs and as part of tokio. The library is not safe across
fork(); in particular, note
9c810e4a11/core/src/parking_lot.rs (L51).
111 lines
2.0 KiB
TOML
111 lines
2.0 KiB
TOML
[workspace]
|
|
resolver = "2"
|
|
|
|
members = [
|
|
"cas_client",
|
|
"cas_object",
|
|
"cas_types",
|
|
"chunk_cache",
|
|
"data",
|
|
"deduplication",
|
|
"error_printer",
|
|
"file_utils",
|
|
"mdb_shard",
|
|
"merklehash",
|
|
"parutils",
|
|
"progress_tracking",
|
|
"utils",
|
|
"xet_threadpool",
|
|
]
|
|
|
|
exclude = ["chunk_cache_bench", "hf_xet", "hf_xet_wasm", "hf_xet_thin_wasm"]
|
|
|
|
[profile.release]
|
|
opt-level = 3
|
|
lto = true
|
|
debug = 1
|
|
|
|
[profile.opt-test]
|
|
inherits = "dev"
|
|
opt-level = 3
|
|
debug = 1
|
|
|
|
[workspace.dependencies]
|
|
anyhow = "1"
|
|
async-scoped = { version = "0.7", features = ["use-tokio"] }
|
|
async-trait = "0.1"
|
|
base64 = "0.22"
|
|
bincode = "1.3"
|
|
bitflags = { version = "2.9", features = ["serde"] }
|
|
blake3 = "1.5"
|
|
bytes = "1.8"
|
|
chrono = "0.4"
|
|
clap = { version = "4", features = ["derive"] }
|
|
colored = "2"
|
|
countio = { version = "0.2", features = ["futures"] }
|
|
crc32fast = "1.4"
|
|
csv = "1"
|
|
ctor = "0.4"
|
|
derivative = "2.2.0"
|
|
dirs = "5.0"
|
|
futures = "0.3"
|
|
futures-util = "0.3"
|
|
gearhash = "0.1"
|
|
getrandom = "0.3"
|
|
half = "2.4"
|
|
heed = "0.11"
|
|
http = "1"
|
|
itertools = "0.14"
|
|
jsonwebtoken = "9.3"
|
|
lazy_static = "1.5"
|
|
libc = "0.2"
|
|
lz4_flex = "0.11"
|
|
mockall = "0.13"
|
|
more-asserts = "0.3"
|
|
once_cell = "1.20"
|
|
pin-project = "1"
|
|
prometheus = "0.14"
|
|
rand = "0.9"
|
|
rand_chacha = "0.9"
|
|
rayon = "1.5"
|
|
regex = "1"
|
|
rustc-hash = "1.1"
|
|
safe-transmute = "0.11"
|
|
serde = { version = "1", features = ["derive"] }
|
|
serde_json = "1"
|
|
serde_repr = "0.1"
|
|
sha2 = "0.10"
|
|
static_assertions = "1.1"
|
|
tempfile = "3.20"
|
|
thiserror = "2.0"
|
|
tokio = { version = "1.47" }
|
|
tokio-retry = "0.3"
|
|
tokio-util = { version = "0.7" }
|
|
tracing = "0.1"
|
|
ulid = "1.2"
|
|
uuid = "1"
|
|
url = "2.5"
|
|
walkdir = "2"
|
|
web-time = "1.1.0"
|
|
whoami = "1"
|
|
heapify = "0.2"
|
|
shellexpand = "3.1.1"
|
|
oneshot = "0.1.8"
|
|
|
|
# windows
|
|
winapi = { version = "0.3", features = [
|
|
"winerror",
|
|
"winnt",
|
|
"handleapi",
|
|
"processthreadsapi",
|
|
"securitybaseapi",
|
|
] }
|
|
|
|
# dev-deps
|
|
criterion = { version = "0.5", features = ["html_reports"] }
|
|
httpmock = "0.7"
|
|
serial_test = "3"
|
|
tempdir = "0.3"
|
|
tracing-test = { version = "0.2", features = ["no-env-filter"] }
|
|
wiremock = "0.6"
|