mirror of
https://github.com/huggingface/xet-core.git
synced 2026-06-04 13:30:29 +08:00
Upgrade package dependencies (#644)
This PR updates all the package dependencies that would not cause significant API breakages to the current version. The package versions in hf_xet_wasm and hf_xet are also updated to match the versions in the base package. There should be no functional change.
This commit is contained in:
1943
Cargo.lock
generated
1943
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
51
Cargo.toml
51
Cargo.toml
@@ -40,40 +40,41 @@ axum = "0.8"
|
||||
async-trait = "0.1"
|
||||
base64 = "0.22"
|
||||
bincode = "1.3"
|
||||
bitflags = { version = "2.9", features = ["serde"] }
|
||||
blake3 = "1.5"
|
||||
bytes = "1.8"
|
||||
bitflags = { version = "2.10", features = ["serde"] }
|
||||
blake3 = "1.8"
|
||||
bytemuck = "1"
|
||||
bytes = "1.11"
|
||||
chrono = "0.4"
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
colored = "2"
|
||||
colored = "3"
|
||||
console-subscriber = "0.5"
|
||||
countio = { version = "0.2", features = ["futures"] }
|
||||
crc32fast = "1.4"
|
||||
countio = { version = "0.3", features = ["futures"] }
|
||||
crc32fast = "1.5"
|
||||
csv = "1"
|
||||
ctor = "0.4"
|
||||
ctor = "0.6"
|
||||
derivative = "2.2"
|
||||
dirs = "6.0"
|
||||
duration-str = "0.17"
|
||||
duration-str = "0.19"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
gearhash = "0.1"
|
||||
getrandom = "0.3"
|
||||
getrandom = { version = "0.4", features = ["wasm_js"] }
|
||||
git-url-parse = "0.4"
|
||||
git-version = "0.3"
|
||||
git2 = "0.20"
|
||||
half = "2.4"
|
||||
half = "2.7"
|
||||
heapify = "0.2"
|
||||
heed = "0.11"
|
||||
heed = "0.22"
|
||||
http = "1"
|
||||
hyper = "1.7"
|
||||
hyper = "1.8"
|
||||
hyper-util = "0.1"
|
||||
itertools = "0.14"
|
||||
lazy_static = "1.5"
|
||||
libc = "0.2"
|
||||
lz4_flex = "0.11"
|
||||
mockall = "0.13"
|
||||
lz4_flex = "0.12"
|
||||
mockall = "0.14"
|
||||
more-asserts = "0.3"
|
||||
once_cell = "1.20"
|
||||
once_cell = "1.21"
|
||||
oneshot = "0.1"
|
||||
paste = "1.0"
|
||||
pin-project = "1"
|
||||
@@ -81,14 +82,14 @@ prometheus = "0.14"
|
||||
rand = "0.9"
|
||||
rand_chacha = "0.9"
|
||||
regex = "1"
|
||||
reqwest = { version = "0.12", features = [
|
||||
reqwest = { version = "0.13.1", features = [
|
||||
"json",
|
||||
"stream",
|
||||
"system-proxy",
|
||||
"socks",
|
||||
], default-features = false }
|
||||
reqwest-middleware = "0.4"
|
||||
reqwest-retry = "0.7"
|
||||
reqwest-middleware = "0.5"
|
||||
reqwest-retry = "0.9"
|
||||
rust-netrc = "0.1"
|
||||
safe-transmute = "0.11"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
@@ -98,11 +99,11 @@ sha2 = "0.10"
|
||||
shell-words = "1.1"
|
||||
shellexpand = "3.1"
|
||||
static_assertions = "1.1"
|
||||
statrs = "0.16"
|
||||
sysinfo = "0.37"
|
||||
tempfile = "3.20"
|
||||
statrs = "0.18"
|
||||
sysinfo = "0.38"
|
||||
tempfile = "3.25"
|
||||
thiserror = "2.0"
|
||||
tokio = { version = "1.47" }
|
||||
tokio = { version = "1.49" }
|
||||
tokio-retry = "0.3"
|
||||
tokio-util = { version = "0.7" }
|
||||
tower-service = "0.3"
|
||||
@@ -115,9 +116,9 @@ url = "2.5"
|
||||
urlencoding = "2.1"
|
||||
uuid = "1"
|
||||
walkdir = "2"
|
||||
warp = "0.3"
|
||||
warp = { version = "0.4", features = ["server"] }
|
||||
web-time = "1.1"
|
||||
whoami = "1"
|
||||
whoami = "2"
|
||||
|
||||
# windows
|
||||
winapi = { version = "0.3", features = [
|
||||
@@ -130,7 +131,7 @@ winapi = { version = "0.3", features = [
|
||||
|
||||
# dev-deps
|
||||
approx = "0.5"
|
||||
httpmock = "0.7"
|
||||
httpmock = "0.8"
|
||||
rand_core = "0.6"
|
||||
rand_distr = "0.5"
|
||||
russh = "0.55"
|
||||
|
||||
@@ -48,21 +48,19 @@ strict = []
|
||||
default = ["rustls-tls"]
|
||||
elevated_information_level = []
|
||||
|
||||
# Three options for compliation here.
|
||||
rustls-tls = [
|
||||
"reqwest/rustls-tls",
|
||||
"reqwest/rustls-tls-webpki-roots",
|
||||
"reqwest/rustls-tls-native-roots",
|
||||
]
|
||||
# rustls-tls uses the rustls package with rustls-platform-verifier, which verifies TLS
|
||||
# certificates using the platform's native certificate store (Security.framework on macOS,
|
||||
# SChannel on Windows, system cert paths on Linux). Falls back to bundled webpki-roots
|
||||
# when the system store is unavailable (e.g. minimal containers).
|
||||
rustls-tls = ["reqwest/rustls"]
|
||||
|
||||
# rustls-tls uses the rustls package, which embeds all of the ssl stuff in a rust package. This is the
|
||||
# most portable option, but also may not respect local network configurations. Use this if the native-ssl options don't work.
|
||||
# Uses native tls in the request package; this uses the native-tls package to wrap openssl, which is a more robust and portable
|
||||
# way of ensuring that tls just works.
|
||||
# native-tls wraps the platform's TLS library (OpenSSL on Linux, SChannel on Windows,
|
||||
# Security.framework on macOS). Use this if rustls causes issues in specific environments.
|
||||
native-tls = ["reqwest/native-tls"]
|
||||
|
||||
# This uses the above, but statically compiles in openssl, which makes the result more portable at the expense of
|
||||
# library size.
|
||||
# Same as native-tls but statically compiles OpenSSL, making the result more portable
|
||||
# at the expense of binary size. Useful for manylinux builds where system OpenSSL may
|
||||
# not be available.
|
||||
native-tls-vendored = ["reqwest/native-tls-vendored"]
|
||||
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ pub struct LocalClient {
|
||||
// because heed holds file handles that need to be closed before the directory is deleted.
|
||||
// We use Option<heed::Env> so we can take() it in Drop to properly close via prepare_for_closing.
|
||||
global_dedup_db_env: Option<heed::Env>,
|
||||
global_dedup_table: heed::Database<OwnedType<MerkleHash>, OwnedType<MerkleHash>>,
|
||||
global_dedup_table: heed::Database<SerdeBincode<MerkleHash>, SerdeBincode<MerkleHash>>,
|
||||
shard_manager: Arc<ShardFileManager>,
|
||||
xorb_dir: PathBuf,
|
||||
shard_dir: PathBuf,
|
||||
@@ -104,13 +104,18 @@ impl LocalClient {
|
||||
}
|
||||
|
||||
// Open / set up the global dedup lookup.
|
||||
let global_dedup_db_env = heed::EnvOpenOptions::new()
|
||||
.open(&global_dedup_dir)
|
||||
let global_dedup_db_env = unsafe { heed::EnvOpenOptions::new().open(&global_dedup_dir) }
|
||||
.map_err(|e| CasClientError::Other(format!("Error opening db at {global_dedup_dir:?}: {e}")))?;
|
||||
|
||||
let mut write_txn = global_dedup_db_env
|
||||
.write_txn()
|
||||
.map_err(|e| CasClientError::Other(format!("Error opening heed write transaction: {e}")))?;
|
||||
let global_dedup_table = global_dedup_db_env
|
||||
.create_database(None)
|
||||
.create_database(&mut write_txn, None)
|
||||
.map_err(|e| CasClientError::Other(format!("Error opening heed table: {e}")))?;
|
||||
write_txn
|
||||
.commit()
|
||||
.map_err(|e| CasClientError::Other(format!("Error committing heed database: {e}")))?;
|
||||
|
||||
// Open / set up the shard lookup
|
||||
let shard_manager = ShardFileManager::new_in_session_directory(shard_dir.clone(), true).await?;
|
||||
|
||||
@@ -213,11 +213,12 @@ pub fn create_file(path: impl AsRef<Path>) -> std::io::Result<File> {
|
||||
fn permission_warning(path: &Path, recursive: bool) {
|
||||
#[cfg(unix)]
|
||||
{
|
||||
let username = whoami::username().unwrap_or_else(|_| "unknown".to_string());
|
||||
let message = format!(
|
||||
"The process doesn't have correct read-write permission into path {path:?}, please resets
|
||||
ownership by 'sudo chown{}{} {path:?}'.",
|
||||
if recursive { " -R " } else { " " },
|
||||
whoami::username()
|
||||
username
|
||||
);
|
||||
|
||||
eprintln!("{}", message.bright_blue());
|
||||
|
||||
2128
hf_xet/Cargo.lock
generated
2128
hf_xet/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -33,7 +33,7 @@ pyo3 = { version = "0.26", features = [
|
||||
"abi3-py37",
|
||||
"auto-initialize",
|
||||
] }
|
||||
rand = "0.9.2"
|
||||
rand = "0.9"
|
||||
tracing = "0.1"
|
||||
|
||||
# Unix-specific dependencies
|
||||
|
||||
584
hf_xet_thin_wasm/Cargo.lock
generated
584
hf_xet_thin_wasm/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -11,6 +11,10 @@ deduplication = { path = "../deduplication" }
|
||||
mdb_shard = { path = "../mdb_shard" }
|
||||
merklehash = { path = "../merklehash" }
|
||||
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
getrandom = { version = "0.3", features = ["wasm_js"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde-wasm-bindgen = "0.6.5"
|
||||
wasm-bindgen = "=0.2.100"
|
||||
|
||||
[package.metadata.cargo-machete]
|
||||
ignored = ["getrandom"]
|
||||
|
||||
1150
hf_xet_wasm/Cargo.lock
generated
1150
hf_xet_wasm/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -15,25 +15,26 @@ merklehash = { path = "../merklehash" }
|
||||
progress_tracking = { path = "../progress_tracking" }
|
||||
utils = { path = "../utils" }
|
||||
|
||||
async-trait = "0.1.88"
|
||||
bytes = "1.10.1"
|
||||
async-trait = "0.1"
|
||||
bytes = "1.11"
|
||||
console_error_panic_hook = "0.1.7"
|
||||
console_log = { version = "1.0.0", features = ["color"] }
|
||||
env_logger = "0.11.5"
|
||||
futures = "0.3.31"
|
||||
js-sys = "0.3.72"
|
||||
log = "0.4.22"
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
env_logger = "0.11"
|
||||
futures = "0.3"
|
||||
getrandom = { version = "0.3", features = ["wasm_js"] }
|
||||
js-sys = "0.3.77"
|
||||
log = "0.4"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde-wasm-bindgen = "0.6.5"
|
||||
sha2 = { version = "0.10.8", features = ["asm"] }
|
||||
sha2 = { version = "0.10", features = ["asm"] }
|
||||
thiserror = "2.0"
|
||||
tokio = { version = "1.44", features = ["sync", "rt"] }
|
||||
tokio_with_wasm = { version = "0.8.2", features = ["rt"] }
|
||||
tokio = { version = "1.49", features = ["sync", "rt"] }
|
||||
tokio_with_wasm = { version = "0.8", features = ["rt"] }
|
||||
uuid = { version = "1", features = ["v4", "js"] }
|
||||
wasm_thread = "0.3"
|
||||
wasm-bindgen = "=0.2.100"
|
||||
wasm-bindgen-futures = "0.4.50"
|
||||
web-sys = { version = "0.3.72", features = [
|
||||
web-sys = { version = "0.3.77", features = [
|
||||
"File",
|
||||
"ReadableStream",
|
||||
"ReadableStreamDefaultReader",
|
||||
@@ -58,5 +59,8 @@ web-sys = { version = "0.3.72", features = [
|
||||
[package.metadata.docs.rs]
|
||||
targets = ["wasm32-unknown-unknown"]
|
||||
|
||||
[package.metadata.cargo-machete]
|
||||
ignored = ["getrandom"]
|
||||
|
||||
[dev-dependencies]
|
||||
wasm-bindgen-test = "0.3.50"
|
||||
|
||||
@@ -12,7 +12,19 @@ set -ex
|
||||
# features enabled, ensuring that LLVM will generate atomic instructions,
|
||||
# shared memory, passive segments, etc.
|
||||
|
||||
RUSTFLAGS="-C target-feature=+atomics,+bulk-memory,+mutable-globals \
|
||||
WASM_BINDGEN_VERSION="0.2.100"
|
||||
|
||||
if command -v wasm-bindgen >/dev/null 2>&1; then
|
||||
INSTALLED_WASM_BINDGEN_VERSION="$(wasm-bindgen --version | awk '{print $2}')"
|
||||
else
|
||||
INSTALLED_WASM_BINDGEN_VERSION=""
|
||||
fi
|
||||
|
||||
if [ "$INSTALLED_WASM_BINDGEN_VERSION" != "$WASM_BINDGEN_VERSION" ]; then
|
||||
cargo install -f wasm-bindgen-cli --version "$WASM_BINDGEN_VERSION"
|
||||
fi
|
||||
|
||||
TARGET_RUSTFLAGS="-C target-feature=+atomics,+bulk-memory,+mutable-globals \
|
||||
-C link-arg=--shared-memory \
|
||||
-C link-arg=--max-memory=1073741824 \
|
||||
-C link-arg=--import-memory \
|
||||
@@ -21,6 +33,7 @@ RUSTFLAGS="-C target-feature=+atomics,+bulk-memory,+mutable-globals \
|
||||
-C link-arg=--export=__tls_align \
|
||||
-C link-arg=--export=__tls_base \
|
||||
--cfg getrandom_backend=\"wasm_js\"" \
|
||||
CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUSTFLAGS="$TARGET_RUSTFLAGS" \
|
||||
cargo +nightly build \
|
||||
--example simple \
|
||||
--target wasm32-unknown-unknown \
|
||||
@@ -28,7 +41,6 @@ cargo +nightly build \
|
||||
-Z build-std=std,panic_abort
|
||||
|
||||
# Generate JS/WASM glue for web targets
|
||||
RUSTFLAGS='--cfg getrandom_backend="wasm_js"' \
|
||||
wasm-bindgen \
|
||||
target/wasm32-unknown-unknown/release/examples/simple.wasm \
|
||||
--out-dir ./examples/target/ \
|
||||
|
||||
@@ -223,7 +223,7 @@ impl ShardFileManager {
|
||||
let mut new_shards = Vec::from(new_shards);
|
||||
|
||||
// Compare in reverse order to sort from newest to oldest
|
||||
new_shards.sort_by(|s1, s2| s2.last_modified_time.cmp(&s1.last_modified_time));
|
||||
new_shards.sort_by_key(|shard| std::cmp::Reverse(shard.last_modified_time));
|
||||
let num_shards = new_shards.len();
|
||||
|
||||
for s in new_shards {
|
||||
|
||||
@@ -12,6 +12,7 @@ safe-transmute = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
|
||||
[target.'cfg(not(target_family = "wasm"))'.dependencies]
|
||||
bytemuck = { workspace = true }
|
||||
heed = { workspace = true }
|
||||
|
||||
[target.'cfg(target_family = "wasm")'.dependencies]
|
||||
|
||||
@@ -10,6 +10,8 @@ use std::{fmt, str};
|
||||
use base64::Engine as _;
|
||||
// URL safe Base 64 encoding with ending characters removed.
|
||||
use base64::engine::general_purpose::URL_SAFE_NO_PAD;
|
||||
#[cfg(not(target_family = "wasm"))]
|
||||
use bytemuck::{Pod, Zeroable};
|
||||
use rand::rngs::SmallRng;
|
||||
use rand::{RngCore, SeedableRng};
|
||||
use safe_transmute::{transmute_to_bytes, transmute_to_bytes_mut};
|
||||
@@ -124,14 +126,14 @@ impl core::ops::Rem<u64> for DataHash {
|
||||
}
|
||||
|
||||
#[cfg(not(target_family = "wasm"))]
|
||||
unsafe impl heed::bytemuck::Zeroable for DataHash {
|
||||
unsafe impl Zeroable for DataHash {
|
||||
fn zeroed() -> Self {
|
||||
DataHash([0; 4])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_family = "wasm"))]
|
||||
unsafe impl heed::bytemuck::Pod for DataHash {}
|
||||
unsafe impl Pod for DataHash {}
|
||||
|
||||
/// The error type that is returned if [DataHash::from_hex] fails.
|
||||
#[derive(Debug, Clone)]
|
||||
|
||||
@@ -9,11 +9,11 @@ path = "src/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
utils = { path = "../utils" }
|
||||
konst = "0.3"
|
||||
const-str = "0.3"
|
||||
konst = "0.4"
|
||||
const-str = "1.1"
|
||||
|
||||
[dev-dependencies]
|
||||
serial_test = { workspace = true }
|
||||
|
||||
[features]
|
||||
no-default-cache = []
|
||||
no-default-cache = []
|
||||
|
||||
@@ -335,7 +335,7 @@ fn run_log_directory_cleanup(cfg: LogDirConfig, log_dir: &Path) -> io::Result<()
|
||||
let mut n_pruned = 0;
|
||||
if total_bytes - deleted_bytes > cfg.size_limit {
|
||||
// Sort by oldest first.
|
||||
candidates.sort_by(|a, b| b.age.cmp(&a.age));
|
||||
candidates.sort_by_key(|lf| std::cmp::Reverse(lf.age));
|
||||
for lf in &candidates {
|
||||
if total_bytes - deleted_bytes <= cfg.size_limit {
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user