Clean up dead code (#593)

There have been many dead code left in xet-core due to
`#![allow(dead_code)]` at a couple of places. This PR removes them and
fix the corresponding linting errors. No functionality change.
This commit is contained in:
Di Xiao
2025-12-12 02:55:28 +08:00
committed by GitHub
parent 23f691b26d
commit 74d7c5926c
20 changed files with 19 additions and 147 deletions

38
Cargo.lock generated
View File

@@ -1258,7 +1258,6 @@ dependencies = [
"dirs",
"error_printer",
"hub_client",
"jsonwebtoken",
"lazy_static",
"mdb_shard",
"merklehash",
@@ -2751,21 +2750,6 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "jsonwebtoken"
version = "9.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde"
dependencies = [
"base64 0.22.1",
"js-sys",
"pem",
"ring",
"serde",
"serde_json",
"simple_asn1",
]
[[package]]
name = "konst"
version = "0.3.16"
@@ -3666,16 +3650,6 @@ dependencies = [
"hmac",
]
[[package]]
name = "pem"
version = "3.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38af38e8470ac9dee3ce1bae1af9c1671fffc44ddfd8bd1d0a3445bf349a8ef3"
dependencies = [
"base64 0.22.1",
"serde",
]
[[package]]
name = "pem-rfc7468"
version = "0.7.0"
@@ -4978,18 +4952,6 @@ version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
[[package]]
name = "simple_asn1"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb"
dependencies = [
"num-bigint",
"num-traits",
"thiserror 2.0.12",
"time",
]
[[package]]
name = "siphasher"
version = "1.0.1"

View File

@@ -64,7 +64,6 @@ http = "1"
hyper = "1.7"
hyper-util = "0.1"
itertools = "0.14"
jsonwebtoken = "9.3"
lazy_static = "1.5"
libc = "0.2"
lz4_flex = "0.11"

View File

@@ -314,7 +314,7 @@ impl AdaptiveConcurrencyController {
}
pub async fn acquire_connection_permit(self: &Arc<Self>) -> Result<ConnectionPermit, CasClientError> {
let permit = self.concurrency_semaphore.acquire().await?;
let _permit = self.concurrency_semaphore.acquire().await?;
let info = Arc::new(ConnectionPermitInfo {
controller: Arc::clone(self),
@@ -325,7 +325,7 @@ impl AdaptiveConcurrencyController {
last_partial_report_ms: AtomicU64::new(0),
});
Ok(ConnectionPermit { permit, info })
Ok(ConnectionPermit { _permit, info })
}
/// The current concurrency; there may be more permits out there due to the lazy resolution of decrements, but those
@@ -547,7 +547,7 @@ pub struct ConnectionPermitInfo {
/// A permit for a connection. This can be used to track the start time of a transfer and report back
/// to the original controller whether it's needed.
pub struct ConnectionPermit {
permit: AdjustableSemaphorePermit,
_permit: AdjustableSemaphorePermit,
info: Arc<ConnectionPermitInfo>,
}
@@ -661,7 +661,6 @@ mod test_constants {
pub const INCR_SPACING_MS: u64 = 200;
pub const DECR_SPACING_MS: u64 = 100;
pub const TARGET_TIME_MS_S: u64 = 5;
pub const TARGET_TIME_MS_L: u64 = 20;
pub const LARGE_N_BYTES: u64 = 10000;
@@ -708,7 +707,6 @@ mod tests {
use super::test_constants::*;
use super::*;
pub const B: u64 = 1000;
// Use a larger transfer size for tests to ensure the RTT predictor has enough data
pub const TEST_TRANSFER_SIZE: u64 = 10 * 1024 * 1024; // 10MB

View File

@@ -111,6 +111,7 @@ impl ExpWeightedOnlineLinearRegression {
}
/// Optionally: expose current coefficients (beta0, beta1) if desired.
#[allow(dead_code)]
pub fn coefficients(&self) -> Option<(f64, f64)> {
let delta = self.sw * self.sxx - self.sx * self.sx;
if delta.abs() < 1e-12 {
@@ -125,6 +126,7 @@ impl ExpWeightedOnlineLinearRegression {
/// Check if two models are approximately equal for testing purposes.
///
/// Compares all internal state (sufficient statistics) with a tolerance.
#[cfg(test)]
pub fn approx_equals(&self, other: &Self, epsilon: f64) -> bool {
(self.sw - other.sw).abs() < epsilon
&& (self.sx - other.sx).abs() < epsilon

View File

@@ -106,6 +106,7 @@ impl RTTPredictor {
///
/// - `size_bytes`: the size of the transfer.
/// - `avg_concurrent`: the number of concurrent connections.
#[cfg(test)]
pub fn prediction_standard_error(&self, size_bytes: u64, avg_concurrent: f64) -> Option<f64> {
self.predict(size_bytes, avg_concurrent).1
}

View File

@@ -418,7 +418,7 @@ impl DownloadSegmentLengthTuner {
if metrics.n_retries_on_403 > 0 {
if *num_range_in_segment > 1 {
let delta = xet_config().client.num_range_in_segment_delta.min(*num_range_in_segment - 1);
let delta = self.delta.min(*num_range_in_segment - 1);
info!("detected retries on 403, shrinking segment size by {delta} ranges");
*num_range_in_segment -= delta;
} else {

View File

@@ -1,5 +1,3 @@
#![allow(dead_code)]
pub use chunk_cache::CacheConfig;
pub use http_client::{Api, ResponseErrorLogger, RetryConfig, build_auth_http_client, build_http_client};
pub use interface::Client;
@@ -11,6 +9,7 @@ pub use remote_client::RemoteClient;
pub use crate::error::CasClientError;
pub mod adaptive_concurrency;
#[cfg(not(target_family = "wasm"))]
mod download_utils;
mod error;
@@ -24,5 +23,3 @@ mod output_provider;
pub mod remote_client;
pub mod retry_wrapper;
pub mod upload_progress_stream;
pub mod adaptive_concurrency;

View File

@@ -28,7 +28,6 @@ use crate::{Client, SeekingOutputProvider, SequentialOutput};
pub struct LocalClient {
tmp_dir: Option<TempDir>, // To hold directory to use for local testing
base_dir: PathBuf,
xorb_dir: PathBuf,
shard_dir: PathBuf,
shard_manager: Arc<ShardFileManager>,
@@ -88,7 +87,6 @@ impl LocalClient {
Ok(Self {
tmp_dir: None,
base_dir,
shard_dir,
xorb_dir,
shard_manager,
@@ -199,6 +197,7 @@ impl LocalClient {
Ok(ret)
}
#[cfg(test)]
fn get_length(&self, hash: &MerkleHash) -> Result<u32> {
let file_path = self.get_path_for_entry(hash);
match File::open(file_path) {

View File

@@ -1,6 +1,5 @@
use std::collections::HashMap;
use std::mem::take;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
@@ -61,7 +60,6 @@ pub struct RemoteClient {
chunk_cache: Option<Arc<dyn ChunkCache>>,
#[cfg(not(target_family = "wasm"))]
range_download_single_flight: RangeDownloadSingleFlight,
shard_cache_directory: Option<PathBuf>,
upload_concurrency_controller: Arc<AdaptiveConcurrencyController>,
}
@@ -191,7 +189,6 @@ impl RemoteClient {
endpoint: &str,
auth: &Option<AuthConfig>,
cache_config: &Option<CacheConfig>,
shard_cache_directory: Option<PathBuf>,
session_id: &str,
dry_run: bool,
user_agent: &str,
@@ -226,7 +223,6 @@ impl RemoteClient {
chunk_cache,
#[cfg(not(target_family = "wasm"))]
range_download_single_flight: Arc::new(Group::new()),
shard_cache_directory,
upload_concurrency_controller: AdaptiveConcurrencyController::new_upload("upload"),
}
}
@@ -281,6 +277,7 @@ impl RemoteClient {
#[cfg(not(target_family = "wasm"))]
impl RemoteClient {
#[instrument(skip_all, name = "RemoteClient::batch_get_reconstruction")]
#[allow(dead_code)]
async fn batch_get_reconstruction(
&self,
file_ids: impl Iterator<Item = &MerkleHash>,
@@ -310,7 +307,8 @@ impl RemoteClient {
.run_and_extract_json(move |_partial_report_fn| client.get(url.clone()).with_extension(Api(api_tag)).send())
.await?;
info!(call_id,
info!(
call_id,
file_ids=?file_id_list,
response_count=response.files.len(),
"Completed batch_get_reconstruction API call",
@@ -942,7 +940,7 @@ mod tests {
let raw_xorb = build_raw_xorb(3, ChunkSize::Random(512, 10248));
let threadpool = XetRuntime::new().unwrap();
let client = RemoteClient::new(CAS_ENDPOINT, &None, &None, None, "", false, "");
let client = RemoteClient::new(CAS_ENDPOINT, &None, &None, "", false, "");
let cas_object = build_and_verify_cas_object(raw_xorb, Some(CompressionScheme::LZ4));
@@ -1325,7 +1323,7 @@ mod tests {
// test reconstruct and sequential write
let test = test_case.clone();
let client = RemoteClient::new(endpoint, &None, &None, None, "", false, "");
let client = RemoteClient::new(endpoint, &None, &None, "", false, "");
let buf = ThreadSafeBuffer::default();
let provider = SequentialOutput::from(buf.clone());
let resp = threadpool.external_run_async_task(async move {
@@ -1347,7 +1345,7 @@ mod tests {
// test reconstruct and parallel write
let test = test_case;
let client = RemoteClient::new(endpoint, &None, &None, None, "", false, "");
let client = RemoteClient::new(endpoint, &None, &None, "", false, "");
let buf = ThreadSafeBuffer::default();
let provider = SeekingOutputProvider::from(buf.clone());
let resp = threadpool.external_run_async_task(async move {

View File

@@ -1,4 +1,3 @@
#![allow(dead_code)]
// Re-export the NetworkModelState from the adaptive_concurrency module
pub use cas_client::adaptive_concurrency::{CCLatencyModelState, CCSuccessModelState};
use serde::{Deserialize, Serialize};

View File

@@ -45,7 +45,6 @@ async-trait = { workspace = true }
bytes = { workspace = true }
chrono = { workspace = true }
clap = { workspace = true }
jsonwebtoken = { workspace = true }
lazy_static = { workspace = true }
more-asserts = { workspace = true }
prometheus = { workspace = true }

View File

@@ -218,7 +218,6 @@ async fn query_reconstruction(
&jwt_info.cas_url,
&cas_storage_config.auth,
&Some(cas_storage_config.cache_config.clone()),
Some(config.shard_config.cache_directory.clone()),
"",
true,
&cas_storage_config.user_agent,

View File

@@ -4,7 +4,7 @@ use async_trait::async_trait;
use deduplication::{DeduplicationDataInterface, RawXorbData};
use mdb_shard::file_structs::FileDataSequenceEntry;
use merklehash::MerkleHash;
use progress_tracking::upload_tracking::{CompletionTrackerFileId, FileXorbDependency};
use progress_tracking::upload_tracking::FileXorbDependency;
use tokio::task::JoinSet;
use tracing::Instrument;
@@ -13,15 +13,13 @@ use crate::errors::Result;
use crate::file_upload_session::FileUploadSession;
pub struct UploadSessionDataManager {
file_id: CompletionTrackerFileId,
session: Arc<FileUploadSession>,
active_global_dedup_queries: JoinSet<Result<bool>>,
}
impl UploadSessionDataManager {
pub fn new(session: Arc<FileUploadSession>, file_id: CompletionTrackerFileId) -> Self {
pub fn new(session: Arc<FileUploadSession>) -> Self {
Self {
file_id,
session,
active_global_dedup_queries: Default::default(),
}

View File

@@ -19,9 +19,6 @@ use crate::sha256::ShaGenerator;
/// A class that encapsulates the clean and data task around a single file.
pub struct SingleFileCleaner {
// The id for completion tracking
file_id: CompletionTrackerFileId,
// File name, if known.
file_name: Option<Arc<str>>,
@@ -50,11 +47,10 @@ impl SingleFileCleaner {
sha256: Option<Sha256>,
session: Arc<FileUploadSession>,
) -> Self {
let deduper = FileDeduper::new(UploadSessionDataManager::new(session.clone(), file_id), file_id);
let deduper = FileDeduper::new(UploadSessionDataManager::new(session.clone()), file_id);
Self {
file_name,
file_id,
dedup_manager_fut: Box::pin(async move { Ok(deduper) }),
session,
chunker: deduplication::Chunker::default(),

View File

@@ -19,8 +19,6 @@ use crate::remote_client_interface::create_remote_client;
/// that succeeds or fails as a unit; i.e., all files get uploaded on finalization, and all shards
/// and xorbs needed to reconstruct those files are properly uploaded and registered.
pub struct FileDownloader {
/* ----- Configurations ----- */
config: Arc<TranslatorConfig>,
client: Arc<dyn Client + Send + Sync>,
}
@@ -34,7 +32,7 @@ impl FileDownloader {
.unwrap_or_else(|| Cow::Owned(Ulid::new().to_string()));
let client = create_remote_client(&config, &session_id, false)?;
Ok(Self { config, client })
Ok(Self { client })
}
#[instrument(skip_all, name = "FileDownloader::smudge_file_from_hash", fields(hash=file_id.hex()

View File

@@ -10,7 +10,6 @@ use cas_client::Client;
use cas_object::SerializedCasObject;
use deduplication::constants::{MAX_XORB_BYTES, MAX_XORB_CHUNKS};
use deduplication::{DataAggregator, DeduplicationMetrics, RawXorbData};
use jsonwebtoken::{DecodingKey, Validation, decode};
use lazy_static::lazy_static;
use mdb_shard::Sha256;
use mdb_shard::file_structs::MDBFileInfo;
@@ -49,9 +48,6 @@ pub struct FileUploadSession {
pub(crate) client: Arc<dyn Client + Send + Sync>,
pub(crate) shard_interface: SessionShardInterface,
/// The repo id, if present.
pub(crate) repo_id: Option<String>,
/// The configuration settings, if needed.
pub(crate) config: Arc<TranslatorConfig>,
@@ -135,27 +131,9 @@ impl FileUploadSession {
let shard_interface = SessionShardInterface::new(config.clone(), client.clone(), dry_run).await?;
let repo_id = config.data_config.auth.clone().and_then(|auth| {
let token = auth.token;
let mut validation = Validation::default();
validation.insecure_disable_signature_validation();
decode::<serde_json::Map<String, serde_json::Value>>(
&token,
&DecodingKey::from_secret("".as_ref()), // Secret is not used here
&validation,
)
.ok()
.and_then(|decoded| {
// Extract `repo_id` from the claims map
decoded.claims.get("repoId").and_then(|value| value.as_str().map(String::from))
})
});
Ok(Arc::new(Self {
shard_interface,
client,
repo_id,
config,
completion_tracker,
progress_aggregator,

View File

@@ -1,4 +1,3 @@
#![allow(dead_code)]
pub mod configurations;
pub mod data_client;
mod deduplication_interface;

View File

@@ -18,7 +18,6 @@ pub(crate) fn create_remote_client(
endpoint,
&cas_storage_config.auth,
&Some(cas_storage_config.cache_config.clone()),
Some(config.shard_config.cache_directory.clone()),
session_id,
dry_run,
&cas_storage_config.user_agent,

48
hf_xet/Cargo.lock generated
View File

@@ -778,7 +778,6 @@ dependencies = [
"deduplication",
"error_printer",
"hub_client",
"jsonwebtoken",
"lazy_static",
"mdb_shard",
"merklehash",
@@ -1903,21 +1902,6 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "jsonwebtoken"
version = "9.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde"
dependencies = [
"base64 0.22.1",
"js-sys",
"pem",
"ring",
"serde",
"serde_json",
"simple_asn1",
]
[[package]]
name = "konst"
version = "0.3.16"
@@ -2310,16 +2294,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "num-bigint"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
dependencies = [
"num-integer",
"num-traits",
]
[[package]]
name = "num-complex"
version = "0.4.6"
@@ -2550,16 +2524,6 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "pem"
version = "3.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38af38e8470ac9dee3ce1bae1af9c1671fffc44ddfd8bd1d0a3445bf349a8ef3"
dependencies = [
"base64 0.22.1",
"serde",
]
[[package]]
name = "percent-encoding"
version = "2.3.1"
@@ -3551,18 +3515,6 @@ dependencies = [
"wide",
]
[[package]]
name = "simple_asn1"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb"
dependencies = [
"num-bigint",
"num-traits",
"thiserror 2.0.15",
"time",
]
[[package]]
name = "slab"
version = "0.4.11"

View File

@@ -42,7 +42,6 @@ impl FileUploadSession {
&config.data_config.endpoint,
&config.data_config.auth,
&None,
None,
&config.session_id,
false,
&config.data_config.user_agent,