From 74d7c5926c5172d495acea426a6a39ee2d675591 Mon Sep 17 00:00:00 2001 From: Di Xiao Date: Fri, 12 Dec 2025 02:55:28 +0800 Subject: [PATCH] Clean up dead code (#593) There have been many dead code left in xet-core due to `#![allow(dead_code)]` at a couple of places. This PR removes them and fix the corresponding linting errors. No functionality change. --- Cargo.lock | 38 --------------- Cargo.toml | 1 - .../src/adaptive_concurrency/controller.rs | 8 ++-- .../adaptive_concurrency/exp_weighted_olr.rs | 2 + .../adaptive_concurrency/rtt_prediction.rs | 1 + cas_client/src/download_utils.rs | 2 +- cas_client/src/lib.rs | 5 +- cas_client/src/local_client.rs | 3 +- cas_client/src/remote_client.rs | 14 +++--- .../test/adaptive_concurrency/src/common.rs | 1 - data/Cargo.toml | 1 - data/src/bin/xtool.rs | 1 - data/src/deduplication_interface.rs | 6 +-- data/src/file_cleaner.rs | 6 +-- data/src/file_downloader.rs | 4 +- data/src/file_upload_session.rs | 22 --------- data/src/lib.rs | 1 - data/src/remote_client_interface.rs | 1 - hf_xet/Cargo.lock | 48 ------------------- hf_xet_wasm/src/wasm_file_upload_session.rs | 1 - 20 files changed, 19 insertions(+), 147 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 466a692f..a676a923 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1258,7 +1258,6 @@ dependencies = [ "dirs", "error_printer", "hub_client", - "jsonwebtoken", "lazy_static", "mdb_shard", "merklehash", @@ -2751,21 +2750,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "jsonwebtoken" -version = "9.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" -dependencies = [ - "base64 0.22.1", - "js-sys", - "pem", - "ring", - "serde", - "serde_json", - "simple_asn1", -] - [[package]] name = "konst" version = "0.3.16" @@ -3666,16 +3650,6 @@ dependencies = [ "hmac", ] -[[package]] -name = "pem" -version = "3.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38af38e8470ac9dee3ce1bae1af9c1671fffc44ddfd8bd1d0a3445bf349a8ef3" -dependencies = [ - "base64 0.22.1", - "serde", -] - [[package]] name = "pem-rfc7468" version = "0.7.0" @@ -4978,18 +4952,6 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" -[[package]] -name = "simple_asn1" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" -dependencies = [ - "num-bigint", - "num-traits", - "thiserror 2.0.12", - "time", -] - [[package]] name = "siphasher" version = "1.0.1" diff --git a/Cargo.toml b/Cargo.toml index c471f41d..17231726 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,7 +64,6 @@ http = "1" hyper = "1.7" hyper-util = "0.1" itertools = "0.14" -jsonwebtoken = "9.3" lazy_static = "1.5" libc = "0.2" lz4_flex = "0.11" diff --git a/cas_client/src/adaptive_concurrency/controller.rs b/cas_client/src/adaptive_concurrency/controller.rs index b4b384bb..15f14a50 100644 --- a/cas_client/src/adaptive_concurrency/controller.rs +++ b/cas_client/src/adaptive_concurrency/controller.rs @@ -314,7 +314,7 @@ impl AdaptiveConcurrencyController { } pub async fn acquire_connection_permit(self: &Arc) -> Result { - let permit = self.concurrency_semaphore.acquire().await?; + let _permit = self.concurrency_semaphore.acquire().await?; let info = Arc::new(ConnectionPermitInfo { controller: Arc::clone(self), @@ -325,7 +325,7 @@ impl AdaptiveConcurrencyController { last_partial_report_ms: AtomicU64::new(0), }); - Ok(ConnectionPermit { permit, info }) + Ok(ConnectionPermit { _permit, info }) } /// The current concurrency; there may be more permits out there due to the lazy resolution of decrements, but those @@ -547,7 +547,7 @@ pub struct ConnectionPermitInfo { /// A permit for a connection. This can be used to track the start time of a transfer and report back /// to the original controller whether it's needed. pub struct ConnectionPermit { - permit: AdjustableSemaphorePermit, + _permit: AdjustableSemaphorePermit, info: Arc, } @@ -661,7 +661,6 @@ mod test_constants { pub const INCR_SPACING_MS: u64 = 200; pub const DECR_SPACING_MS: u64 = 100; - pub const TARGET_TIME_MS_S: u64 = 5; pub const TARGET_TIME_MS_L: u64 = 20; pub const LARGE_N_BYTES: u64 = 10000; @@ -708,7 +707,6 @@ mod tests { use super::test_constants::*; use super::*; - pub const B: u64 = 1000; // Use a larger transfer size for tests to ensure the RTT predictor has enough data pub const TEST_TRANSFER_SIZE: u64 = 10 * 1024 * 1024; // 10MB diff --git a/cas_client/src/adaptive_concurrency/exp_weighted_olr.rs b/cas_client/src/adaptive_concurrency/exp_weighted_olr.rs index 27f4c0f4..946cfdc9 100644 --- a/cas_client/src/adaptive_concurrency/exp_weighted_olr.rs +++ b/cas_client/src/adaptive_concurrency/exp_weighted_olr.rs @@ -111,6 +111,7 @@ impl ExpWeightedOnlineLinearRegression { } /// Optionally: expose current coefficients (beta0, beta1) if desired. + #[allow(dead_code)] pub fn coefficients(&self) -> Option<(f64, f64)> { let delta = self.sw * self.sxx - self.sx * self.sx; if delta.abs() < 1e-12 { @@ -125,6 +126,7 @@ impl ExpWeightedOnlineLinearRegression { /// Check if two models are approximately equal for testing purposes. /// /// Compares all internal state (sufficient statistics) with a tolerance. + #[cfg(test)] pub fn approx_equals(&self, other: &Self, epsilon: f64) -> bool { (self.sw - other.sw).abs() < epsilon && (self.sx - other.sx).abs() < epsilon diff --git a/cas_client/src/adaptive_concurrency/rtt_prediction.rs b/cas_client/src/adaptive_concurrency/rtt_prediction.rs index f933a7f5..cda72f38 100644 --- a/cas_client/src/adaptive_concurrency/rtt_prediction.rs +++ b/cas_client/src/adaptive_concurrency/rtt_prediction.rs @@ -106,6 +106,7 @@ impl RTTPredictor { /// /// - `size_bytes`: the size of the transfer. /// - `avg_concurrent`: the number of concurrent connections. + #[cfg(test)] pub fn prediction_standard_error(&self, size_bytes: u64, avg_concurrent: f64) -> Option { self.predict(size_bytes, avg_concurrent).1 } diff --git a/cas_client/src/download_utils.rs b/cas_client/src/download_utils.rs index f7886e9a..89b40275 100644 --- a/cas_client/src/download_utils.rs +++ b/cas_client/src/download_utils.rs @@ -418,7 +418,7 @@ impl DownloadSegmentLengthTuner { if metrics.n_retries_on_403 > 0 { if *num_range_in_segment > 1 { - let delta = xet_config().client.num_range_in_segment_delta.min(*num_range_in_segment - 1); + let delta = self.delta.min(*num_range_in_segment - 1); info!("detected retries on 403, shrinking segment size by {delta} ranges"); *num_range_in_segment -= delta; } else { diff --git a/cas_client/src/lib.rs b/cas_client/src/lib.rs index 93b3b210..47c0e218 100644 --- a/cas_client/src/lib.rs +++ b/cas_client/src/lib.rs @@ -1,5 +1,3 @@ -#![allow(dead_code)] - pub use chunk_cache::CacheConfig; pub use http_client::{Api, ResponseErrorLogger, RetryConfig, build_auth_http_client, build_http_client}; pub use interface::Client; @@ -11,6 +9,7 @@ pub use remote_client::RemoteClient; pub use crate::error::CasClientError; +pub mod adaptive_concurrency; #[cfg(not(target_family = "wasm"))] mod download_utils; mod error; @@ -24,5 +23,3 @@ mod output_provider; pub mod remote_client; pub mod retry_wrapper; pub mod upload_progress_stream; - -pub mod adaptive_concurrency; diff --git a/cas_client/src/local_client.rs b/cas_client/src/local_client.rs index d6dfd9e1..e4417bfb 100644 --- a/cas_client/src/local_client.rs +++ b/cas_client/src/local_client.rs @@ -28,7 +28,6 @@ use crate::{Client, SeekingOutputProvider, SequentialOutput}; pub struct LocalClient { tmp_dir: Option, // To hold directory to use for local testing - base_dir: PathBuf, xorb_dir: PathBuf, shard_dir: PathBuf, shard_manager: Arc, @@ -88,7 +87,6 @@ impl LocalClient { Ok(Self { tmp_dir: None, - base_dir, shard_dir, xorb_dir, shard_manager, @@ -199,6 +197,7 @@ impl LocalClient { Ok(ret) } + #[cfg(test)] fn get_length(&self, hash: &MerkleHash) -> Result { let file_path = self.get_path_for_entry(hash); match File::open(file_path) { diff --git a/cas_client/src/remote_client.rs b/cas_client/src/remote_client.rs index 68bea221..39a4b8c8 100644 --- a/cas_client/src/remote_client.rs +++ b/cas_client/src/remote_client.rs @@ -1,6 +1,5 @@ use std::collections::HashMap; use std::mem::take; -use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; @@ -61,7 +60,6 @@ pub struct RemoteClient { chunk_cache: Option>, #[cfg(not(target_family = "wasm"))] range_download_single_flight: RangeDownloadSingleFlight, - shard_cache_directory: Option, upload_concurrency_controller: Arc, } @@ -191,7 +189,6 @@ impl RemoteClient { endpoint: &str, auth: &Option, cache_config: &Option, - shard_cache_directory: Option, session_id: &str, dry_run: bool, user_agent: &str, @@ -226,7 +223,6 @@ impl RemoteClient { chunk_cache, #[cfg(not(target_family = "wasm"))] range_download_single_flight: Arc::new(Group::new()), - shard_cache_directory, upload_concurrency_controller: AdaptiveConcurrencyController::new_upload("upload"), } } @@ -281,6 +277,7 @@ impl RemoteClient { #[cfg(not(target_family = "wasm"))] impl RemoteClient { #[instrument(skip_all, name = "RemoteClient::batch_get_reconstruction")] + #[allow(dead_code)] async fn batch_get_reconstruction( &self, file_ids: impl Iterator, @@ -310,7 +307,8 @@ impl RemoteClient { .run_and_extract_json(move |_partial_report_fn| client.get(url.clone()).with_extension(Api(api_tag)).send()) .await?; - info!(call_id, + info!( + call_id, file_ids=?file_id_list, response_count=response.files.len(), "Completed batch_get_reconstruction API call", @@ -942,7 +940,7 @@ mod tests { let raw_xorb = build_raw_xorb(3, ChunkSize::Random(512, 10248)); let threadpool = XetRuntime::new().unwrap(); - let client = RemoteClient::new(CAS_ENDPOINT, &None, &None, None, "", false, ""); + let client = RemoteClient::new(CAS_ENDPOINT, &None, &None, "", false, ""); let cas_object = build_and_verify_cas_object(raw_xorb, Some(CompressionScheme::LZ4)); @@ -1325,7 +1323,7 @@ mod tests { // test reconstruct and sequential write let test = test_case.clone(); - let client = RemoteClient::new(endpoint, &None, &None, None, "", false, ""); + let client = RemoteClient::new(endpoint, &None, &None, "", false, ""); let buf = ThreadSafeBuffer::default(); let provider = SequentialOutput::from(buf.clone()); let resp = threadpool.external_run_async_task(async move { @@ -1347,7 +1345,7 @@ mod tests { // test reconstruct and parallel write let test = test_case; - let client = RemoteClient::new(endpoint, &None, &None, None, "", false, ""); + let client = RemoteClient::new(endpoint, &None, &None, "", false, ""); let buf = ThreadSafeBuffer::default(); let provider = SeekingOutputProvider::from(buf.clone()); let resp = threadpool.external_run_async_task(async move { diff --git a/cas_client/test/adaptive_concurrency/src/common.rs b/cas_client/test/adaptive_concurrency/src/common.rs index 6f1b7a23..5aa97651 100644 --- a/cas_client/test/adaptive_concurrency/src/common.rs +++ b/cas_client/test/adaptive_concurrency/src/common.rs @@ -1,4 +1,3 @@ -#![allow(dead_code)] // Re-export the NetworkModelState from the adaptive_concurrency module pub use cas_client::adaptive_concurrency::{CCLatencyModelState, CCSuccessModelState}; use serde::{Deserialize, Serialize}; diff --git a/data/Cargo.toml b/data/Cargo.toml index fef93324..b539655b 100644 --- a/data/Cargo.toml +++ b/data/Cargo.toml @@ -45,7 +45,6 @@ async-trait = { workspace = true } bytes = { workspace = true } chrono = { workspace = true } clap = { workspace = true } -jsonwebtoken = { workspace = true } lazy_static = { workspace = true } more-asserts = { workspace = true } prometheus = { workspace = true } diff --git a/data/src/bin/xtool.rs b/data/src/bin/xtool.rs index 0482f62b..7013526c 100644 --- a/data/src/bin/xtool.rs +++ b/data/src/bin/xtool.rs @@ -218,7 +218,6 @@ async fn query_reconstruction( &jwt_info.cas_url, &cas_storage_config.auth, &Some(cas_storage_config.cache_config.clone()), - Some(config.shard_config.cache_directory.clone()), "", true, &cas_storage_config.user_agent, diff --git a/data/src/deduplication_interface.rs b/data/src/deduplication_interface.rs index f38d4d96..0d636f76 100644 --- a/data/src/deduplication_interface.rs +++ b/data/src/deduplication_interface.rs @@ -4,7 +4,7 @@ use async_trait::async_trait; use deduplication::{DeduplicationDataInterface, RawXorbData}; use mdb_shard::file_structs::FileDataSequenceEntry; use merklehash::MerkleHash; -use progress_tracking::upload_tracking::{CompletionTrackerFileId, FileXorbDependency}; +use progress_tracking::upload_tracking::FileXorbDependency; use tokio::task::JoinSet; use tracing::Instrument; @@ -13,15 +13,13 @@ use crate::errors::Result; use crate::file_upload_session::FileUploadSession; pub struct UploadSessionDataManager { - file_id: CompletionTrackerFileId, session: Arc, active_global_dedup_queries: JoinSet>, } impl UploadSessionDataManager { - pub fn new(session: Arc, file_id: CompletionTrackerFileId) -> Self { + pub fn new(session: Arc) -> Self { Self { - file_id, session, active_global_dedup_queries: Default::default(), } diff --git a/data/src/file_cleaner.rs b/data/src/file_cleaner.rs index fb50d62e..8d570599 100644 --- a/data/src/file_cleaner.rs +++ b/data/src/file_cleaner.rs @@ -19,9 +19,6 @@ use crate::sha256::ShaGenerator; /// A class that encapsulates the clean and data task around a single file. pub struct SingleFileCleaner { - // The id for completion tracking - file_id: CompletionTrackerFileId, - // File name, if known. file_name: Option>, @@ -50,11 +47,10 @@ impl SingleFileCleaner { sha256: Option, session: Arc, ) -> Self { - let deduper = FileDeduper::new(UploadSessionDataManager::new(session.clone(), file_id), file_id); + let deduper = FileDeduper::new(UploadSessionDataManager::new(session.clone()), file_id); Self { file_name, - file_id, dedup_manager_fut: Box::pin(async move { Ok(deduper) }), session, chunker: deduplication::Chunker::default(), diff --git a/data/src/file_downloader.rs b/data/src/file_downloader.rs index 71d203b2..01f8a348 100644 --- a/data/src/file_downloader.rs +++ b/data/src/file_downloader.rs @@ -19,8 +19,6 @@ use crate::remote_client_interface::create_remote_client; /// that succeeds or fails as a unit; i.e., all files get uploaded on finalization, and all shards /// and xorbs needed to reconstruct those files are properly uploaded and registered. pub struct FileDownloader { - /* ----- Configurations ----- */ - config: Arc, client: Arc, } @@ -34,7 +32,7 @@ impl FileDownloader { .unwrap_or_else(|| Cow::Owned(Ulid::new().to_string())); let client = create_remote_client(&config, &session_id, false)?; - Ok(Self { config, client }) + Ok(Self { client }) } #[instrument(skip_all, name = "FileDownloader::smudge_file_from_hash", fields(hash=file_id.hex() diff --git a/data/src/file_upload_session.rs b/data/src/file_upload_session.rs index f73d2c0a..c51d1f5a 100644 --- a/data/src/file_upload_session.rs +++ b/data/src/file_upload_session.rs @@ -10,7 +10,6 @@ use cas_client::Client; use cas_object::SerializedCasObject; use deduplication::constants::{MAX_XORB_BYTES, MAX_XORB_CHUNKS}; use deduplication::{DataAggregator, DeduplicationMetrics, RawXorbData}; -use jsonwebtoken::{DecodingKey, Validation, decode}; use lazy_static::lazy_static; use mdb_shard::Sha256; use mdb_shard::file_structs::MDBFileInfo; @@ -49,9 +48,6 @@ pub struct FileUploadSession { pub(crate) client: Arc, pub(crate) shard_interface: SessionShardInterface, - /// The repo id, if present. - pub(crate) repo_id: Option, - /// The configuration settings, if needed. pub(crate) config: Arc, @@ -135,27 +131,9 @@ impl FileUploadSession { let shard_interface = SessionShardInterface::new(config.clone(), client.clone(), dry_run).await?; - let repo_id = config.data_config.auth.clone().and_then(|auth| { - let token = auth.token; - let mut validation = Validation::default(); - validation.insecure_disable_signature_validation(); - - decode::>( - &token, - &DecodingKey::from_secret("".as_ref()), // Secret is not used here - &validation, - ) - .ok() - .and_then(|decoded| { - // Extract `repo_id` from the claims map - decoded.claims.get("repoId").and_then(|value| value.as_str().map(String::from)) - }) - }); - Ok(Arc::new(Self { shard_interface, client, - repo_id, config, completion_tracker, progress_aggregator, diff --git a/data/src/lib.rs b/data/src/lib.rs index 8045291d..615923e9 100644 --- a/data/src/lib.rs +++ b/data/src/lib.rs @@ -1,4 +1,3 @@ -#![allow(dead_code)] pub mod configurations; pub mod data_client; mod deduplication_interface; diff --git a/data/src/remote_client_interface.rs b/data/src/remote_client_interface.rs index c9af7a6e..8d50711b 100644 --- a/data/src/remote_client_interface.rs +++ b/data/src/remote_client_interface.rs @@ -18,7 +18,6 @@ pub(crate) fn create_remote_client( endpoint, &cas_storage_config.auth, &Some(cas_storage_config.cache_config.clone()), - Some(config.shard_config.cache_directory.clone()), session_id, dry_run, &cas_storage_config.user_agent, diff --git a/hf_xet/Cargo.lock b/hf_xet/Cargo.lock index 7089f4ff..7fd0fb54 100644 --- a/hf_xet/Cargo.lock +++ b/hf_xet/Cargo.lock @@ -778,7 +778,6 @@ dependencies = [ "deduplication", "error_printer", "hub_client", - "jsonwebtoken", "lazy_static", "mdb_shard", "merklehash", @@ -1903,21 +1902,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "jsonwebtoken" -version = "9.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" -dependencies = [ - "base64 0.22.1", - "js-sys", - "pem", - "ring", - "serde", - "serde_json", - "simple_asn1", -] - [[package]] name = "konst" version = "0.3.16" @@ -2310,16 +2294,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", -] - [[package]] name = "num-complex" version = "0.4.6" @@ -2550,16 +2524,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" -[[package]] -name = "pem" -version = "3.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38af38e8470ac9dee3ce1bae1af9c1671fffc44ddfd8bd1d0a3445bf349a8ef3" -dependencies = [ - "base64 0.22.1", - "serde", -] - [[package]] name = "percent-encoding" version = "2.3.1" @@ -3551,18 +3515,6 @@ dependencies = [ "wide", ] -[[package]] -name = "simple_asn1" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" -dependencies = [ - "num-bigint", - "num-traits", - "thiserror 2.0.15", - "time", -] - [[package]] name = "slab" version = "0.4.11" diff --git a/hf_xet_wasm/src/wasm_file_upload_session.rs b/hf_xet_wasm/src/wasm_file_upload_session.rs index 11b3376b..c547416c 100644 --- a/hf_xet_wasm/src/wasm_file_upload_session.rs +++ b/hf_xet_wasm/src/wasm_file_upload_session.rs @@ -42,7 +42,6 @@ impl FileUploadSession { &config.data_config.endpoint, &config.data_config.auth, &None, - None, &config.session_id, false, &config.data_config.user_agent,