Integration test for ssh access on Windows (#566)

This PR builds on top of
https://github.com/huggingface/xet-core/pull/565 and builds an
integration test to test access to "ssh" and "sh" on Windows through the
"git" (-> "git-lfs") -> "git-xet" call chain.

Out of all the ssh variants, access to programs like "plink", "putty",
"tortoiseplink" or "simple" should be given by the env var
`$GIT_SSH_COMMAND` or `$GIT_SSH`, or by git config entry
`core.sshCommand`. Direct access to the mostly used utility "ssh" and
in-direct access to "ssh" via "sh -c" on Windows is provided by the
"git" (-> "git-lfs") -> "git-xet" call chain, see
git_xet/tests/test_ssh.rs for details.
This commit is contained in:
Di Xiao
2025-11-20 19:22:19 +08:00
committed by GitHub
parent 075a9c96c0
commit 5f77ffc46a
11 changed files with 1562 additions and 35 deletions

View File

@@ -46,7 +46,7 @@ jobs:
git lfs install
- name: Build and Test
run: |
cargo test --verbose --no-fail-fast --features "strict"
cargo test --verbose --no-fail-fast --features "strict git-xet-for-integration-test"
- name: Check Cargo.lock has no uncommitted changes
run: |
# the build and test steps would update Cargo.lock if it is out of date
@@ -63,7 +63,7 @@ jobs:
- uses: ./.github/actions/cache-rust-build
- name: Build and Test
run: |
cargo test --verbose --no-fail-fast --features "strict"
cargo test --verbose --no-fail-fast --features "strict git-xet-for-integration-test"
build_and_test-macos:
runs-on: macos-latest
steps:
@@ -80,7 +80,7 @@ jobs:
- uses: ./.github/actions/cache-rust-build
- name: Build and Test
run: |
cargo test --verbose --no-fail-fast --features "strict"
cargo test --verbose --no-fail-fast --features "strict git-xet-for-integration-test"
build_and_test-wasm:
name: Build WASM
runs-on: ubuntu-latest

1207
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -126,6 +126,8 @@ winapi = { version = "0.3", features = [
# dev-deps
criterion = { version = "0.5", features = ["html_reports"] }
httpmock = "0.7"
rand_core = "0.6"
russh = "0.54"
serial_test = "3"
tempdir = "0.3"
tracing-test = { version = "0.2", features = ["no-env-filter"] }

View File

@@ -21,8 +21,10 @@ clap = { workspace = true }
derivative = { workspace = true }
git-url-parse = { workspace = true }
git2 = { workspace = true }
rand_core = { workspace = true }
reqwest = { workspace = true }
reqwest-middleware = { workspace = true }
russh = { workspace = true }
rust-netrc = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
@@ -35,4 +37,7 @@ shell-words = { workspace = true }
openssh = { workspace = true }
[dev-dependencies]
serial_test = { workspace = true }
serial_test = { workspace = true }
[features]
git-xet-for-integration-test = []

View File

@@ -42,6 +42,11 @@ Remove "lfs.concurrenttransfers" from the global Git config."#)]
/// Start tracking the given patterns(s) through Git LFS. This directly
/// calls the "git lfs track" command with the following options and args.
Track(TrackArg),
/// Run any arguments passed in as a command. This is a feature only for
/// integration tests.
#[cfg(feature = "git-xet-for-integration-test")]
RunAny(RunAnyArg),
}
#[derive(Args, Debug)]
@@ -95,6 +100,13 @@ struct TrackArg {
args_to_git_lfs_track: Vec<String>,
}
#[derive(Args, Debug)]
#[cfg(feature = "git-xet-for-integration-test")]
struct RunAnyArg {
program: String,
args: Option<Vec<String>>,
}
#[derive(Args, Debug)]
struct CliOverrides {
/// Increase verbosity of output (-v, -vv, etc.)
@@ -140,6 +152,8 @@ impl Command {
Command::Uninstall(args) => uninstall_command(args),
Command::Transfer => transfer_command().await,
Command::Track(args) => track_command(args),
#[cfg(feature = "git-xet-for-integration-test")]
Command::RunAny(args) => run_any_command(args),
}
}
@@ -149,6 +163,8 @@ impl Command {
Command::Uninstall(_) => "uninstall",
Command::Transfer => "transfer",
Command::Track(_) => "track",
#[cfg(feature = "git-xet-for-integration-test")]
Command::RunAny(_) => "runany",
}
}
}
@@ -221,3 +237,13 @@ fn track_command(args: TrackArg) -> Result<()> {
cmd.status()?;
Ok(())
}
#[cfg(feature = "git-xet-for-integration-test")]
fn run_any_command(args: RunAnyArg) -> Result<()> {
let mut cmd = std::process::Command::new(args.program);
if let Some(args) = args.args {
cmd.args(args);
}
let _ = cmd.status()?;
Ok(())
}

View File

@@ -15,6 +15,7 @@ mod ssh;
use git::GitCredentialHelper;
#[cfg(unix)]
use ssh::SSHCredentialHelper;
pub use ssh::{GitLFSAuthentationResponseHeader, GitLFSAuthenticateResponse};
// This mod derives credentials for the Xet CAS token API on HF Hub from the local repository's credentials.
// Unlike the authorization model in huggingface_hub which adheres to using a HF token, Git and Git LFS have

View File

@@ -6,24 +6,24 @@ use hub_client::{CredentialHelper, HubClientError, Operation, Result};
use openssh::{KnownHosts, Session};
use reqwest::header;
use reqwest_middleware::RequestBuilder;
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use crate::git_url::GitUrl;
#[derive(Deserialize)]
struct GitLFSAuthentationResponseHeader {
#[derive(Deserialize, Serialize)]
pub struct GitLFSAuthentationResponseHeader {
#[serde(rename = "Authorization")]
authorization: String,
pub authorization: String,
}
// This struct represents the JSON format of the `git-lfs-authenticate` command response over an
// SSH channel to the remote Git server. For details see `crate::auth.rs`.
#[derive(Deserialize)]
#[derive(Deserialize, Serialize)]
#[allow(unused)]
struct GitLFSAuthenticateResponse {
header: GitLFSAuthentationResponseHeader,
href: String,
expires_in: u32,
pub struct GitLFSAuthenticateResponse {
pub header: GitLFSAuthentationResponseHeader,
pub href: String,
pub expires_in: u32,
}
// This credential helper calls a remote command `git-lfs-authenticate` over an SSH channel

View File

@@ -5,6 +5,6 @@ mod errors;
mod git_repo;
mod git_url;
mod lfs_agent_protocol;
mod test_utils;
pub mod test_utils;
mod token_refresher;
mod utils;

View File

@@ -1,6 +1,8 @@
mod ssh_server;
mod temp_home;
mod test_repo;
pub use ssh_server::{GitLFSAuthenticateResponse, start_local_ssh_server};
#[cfg(test)]
pub use temp_home::TempHome;
#[cfg(test)]

View File

@@ -0,0 +1,191 @@
use std::io;
use std::sync::Arc;
use anyhow::anyhow;
use rand_core::OsRng;
use russh::keys::{Certificate, *};
use russh::server::{Msg, Server as _, Session};
use russh::*;
use tokio::net::TcpListener;
use tokio::task::JoinHandle;
pub use crate::auth::{GitLFSAuthentationResponseHeader, GitLFSAuthenticateResponse};
/// Starts a lightweight SSH server intended for tests and local/manual debugging.
///
/// The server:
/// - binds to the given port (use `Some(0)` or `None` to let the OS pick a free port),
/// - accepts public-key and OpenSSH certificate authentication,
/// - handles `exec` requests and specifically responds to the `git-lfs-authenticate <repo> <operation>` command with a
/// small JSON payload and then closes the channel,
/// - runs on the tokio runtime and returns a JoinHandle for the spawned server task so the caller can abort or await it
/// when finished.
///
/// Arguments:
/// - `port`: Option<u16> — Some(port) to bind to that port, None (or Some(0)) to bind to an OS-assigned port.
///
/// Returns:
/// - io::Result<(u16, JoinHandle<io::Result<()>>)> where the first element is the actual port the listener is bound to
/// and the second is a handle to the background task running the server.
///
/// Example (async context):
/// ```ignore
///
/// // Start server on any free port
/// let (port, server_task) = start_local_ssh_server(None).await?;
/// println!("Test SSH server listening on port {}", port);
///
/// // ... run test client actions against localhost:port ...
///
/// // Stop the server: abort the background task and optionally await it.
/// server_task.abort();
/// let _ = server_task.await;
/// ```
pub async fn start_local_ssh_server(port: Option<u16>) -> io::Result<(u16, JoinHandle<io::Result<()>>)> {
let config = russh::server::Config {
inactivity_timeout: Some(std::time::Duration::from_secs(3600)),
auth_rejection_time: std::time::Duration::from_secs(3),
auth_rejection_time_initial: Some(std::time::Duration::from_secs(0)),
keys: vec![russh::keys::PrivateKey::random(&mut OsRng, russh::keys::Algorithm::Ed25519).unwrap()],
preferred: Preferred { ..Preferred::default() },
..Default::default()
};
let config = Arc::new(config);
let mut sh = ServerImpl;
let socket = TcpListener::bind(("0.0.0.0", port.unwrap_or_default())).await.unwrap();
let port = socket.local_addr()?.port();
Ok((
port,
tokio::spawn(async move {
let server = sh.run_on_socket(config, &socket);
server.await
}),
))
}
#[derive(Clone)]
struct ServerImpl;
impl server::Server for ServerImpl {
type Handler = Self;
fn new_client(&mut self, _: Option<std::net::SocketAddr>) -> Self {
self.clone()
}
fn handle_session_error(&mut self, _error: <Self::Handler as russh::server::Handler>::Error) {
eprintln!("Session error: {_error:#?}");
}
}
impl server::Handler for ServerImpl {
type Error = russh::Error;
async fn channel_open_session(
&mut self,
_channel: Channel<Msg>,
_session: &mut Session,
) -> Result<bool, Self::Error> {
Ok(true)
}
async fn pty_request(
&mut self,
channel: ChannelId,
_term: &str,
_col_width: u32,
_row_height: u32,
_pix_width: u32,
_pix_height: u32,
_modes: &[(Pty, u32)],
session: &mut Session,
) -> Result<(), Self::Error> {
session.channel_failure(channel)?;
session.close(channel)?;
Ok(())
}
async fn shell_request(&mut self, channel: ChannelId, session: &mut Session) -> Result<(), Self::Error> {
session.channel_failure(channel)?;
session.close(channel)?;
Ok(())
}
async fn auth_none(&mut self, _: &str) -> Result<server::Auth, Self::Error> {
Ok(server::Auth::Accept)
}
async fn auth_publickey(&mut self, _: &str, _key: &ssh_key::PublicKey) -> Result<server::Auth, Self::Error> {
Ok(server::Auth::Accept)
}
async fn auth_openssh_certificate(
&mut self,
_user: &str,
_certificate: &Certificate,
) -> Result<server::Auth, Self::Error> {
Ok(server::Auth::Accept)
}
async fn exec_request(
&mut self,
channel: ChannelId,
data: &[u8],
session: &mut Session,
) -> Result<(), Self::Error> {
let request = String::from_utf8_lossy(data);
let request: Vec<_> = request.split_ascii_whitespace().collect();
let response = if let Some(command) = request.first() {
match *command {
"git-lfs-authenticate" => self.git_lfs_authenticate(request).unwrap_or_else(|e| e.to_string()),
_ => "invalid command".into(),
}
} else {
"invalid request".into()
};
let data = CryptoVec::from(response);
session.data(channel, data)?;
session.close(channel)?;
Ok(())
}
}
impl ServerImpl {
fn git_lfs_authenticate(&self, request: Vec<&str>) -> anyhow::Result<String> {
let Some(repo_id) = request.get(1) else {
return Err(anyhow!("invalid request, missing repo id"));
};
let Some(operation) = request.get(2) else {
return Err(anyhow!("invalid request, missing operation"));
};
if !matches!(*operation, "upload" | "download") {
return Err(anyhow!("invalid request, unrecognized operation"));
}
let response = GitLFSAuthenticateResponse {
header: GitLFSAuthentationResponseHeader {
authorization: "Basic 38vcn391nv==".into(),
},
href: format!("https://huggingface.co/{repo_id}.git/info/lfs"),
expires_in: 3600,
};
let json_str = serde_json::to_string(&response)?;
Ok(json_str)
}
}
#[cfg(test)]
mod tests {
use super::start_local_ssh_server;
#[tokio::test]
#[ignore = "start an ssh server for manual testing"]
async fn run_server() {
let (_port, task) = start_local_ssh_server(Some(2222)).await.unwrap();
let _ret = task.await;
}
}

135
git_xet/tests/test_ssh.rs Normal file
View File

@@ -0,0 +1,135 @@
//! Integration tests for verifying access to POSIX utility commands when running programs
//! through `git-xet` (invoked via `git-lfs` and `git`). These tests focus on the behaviour on
//! Windows where "Git for Windows" ships a MinGW/MSYS environment containing common POSIX
//! utilities (`ssh`, `sh`, `chmod`, `uname`, etc.) and augments the `PATH` of the `git`
//! process with the directories that contain those utilities.
//!
//! Rationale
//! - When `git` is executed on Windows it adds the MinGW/MSYS directories to the `PATH` so child processes can find
//! POSIX tools bundled with Git for Windows. When `git-xet` is invoked by the git-lfs filter process (the filter
//! itself is started by `git`), it ultimately runs as a descendant of the `git` process and therefore inherits the
//! augmented environment. These tests ensure that `git-xet` (launched via the `git` invocation chain) can locate and
//! execute those POSIX utilities as expected. This is the same mechaism used by git-lfs on Windows to access the
//! "ssh" utility.
//!
//! What is tested
//! - test_access_posix_commands: runs a set of simple POSIX commands through `git-xet` and asserts that the commands
//! execute successfully (exit code 0) and emit output matching expected substrings.
//!
//! - test_ssh_connect_through_ssh_cmd and test_ssh_connect_through_sh_cmd: These tests start a local SSH server and
//! then attempt to run `ssh` to that server through `git-xet`. They validate that invoking `ssh` directly or
//! indirectly via `sh -c "ssh ..."` results in the expected JSON response from the server, proving that `ssh` is
//! callable and functional when executed from within the `git-xet` invocation context.
//!
//! Implementation notes
//! - `git_xet_run` constructs a command that runs `git xet run-any -- <command...>` pointing at the `git-xet` test
//! binary (resolved through the `CARGO_BIN_EXE_git-xet` env var) called by `git`.
//! - On Windows the test sets the current directory to the `git-xet` build directory so the local `git-xet` executable
//! can be found and executed (local directory precedence is relied upon on Windows). On Unix the build directory is
//! prepended to `PATH` so the correct binary is found.
//! - The `run-any` command of `git-xet` is gated behind the `git-xet-for-integration-test` feature. They are ignored by
//! default unless that feature is enabled in the test run.
//!
//! These tests provide confidence that environment inheritance from `git` to `git-xet` is
//! sufficient for locating and invoking the POSIX utilities bundled with Git for Windows,
//! including functional SSH execution to a local server.
use std::ffi::OsStr;
use std::path::Path;
use std::process::{Command, Output, Stdio};
use anyhow::Result;
use git_xet::test_utils::{GitLFSAuthenticateResponse, start_local_ssh_server};
fn git_xet_run<I, S>(command: I) -> std::io::Result<Output>
where
I: IntoIterator<Item = S>,
S: AsRef<OsStr>,
{
let test_bin_path = env!("CARGO_BIN_EXE_git-xet");
let buildpath = Path::new(&test_bin_path).parent().unwrap();
let mut cmd = Command::new("git");
cmd.args(["xet", "run-any", "--"]); // Add "--" to escape options ("-[]" or "--[]") in the actual command
cmd.args(command);
cmd.current_dir(buildpath); // on Windows local directory takes the precedence to find an executable
#[cfg(unix)]
{
cmd.env("PATH", format!("{}:{}", buildpath.to_str().unwrap_or_default(), std::env::var("PATH").unwrap()));
}
cmd.stdout(Stdio::piped()).stderr(Stdio::piped()).spawn()?.wait_with_output()
}
#[test]
#[cfg(windows)]
#[cfg_attr(not(feature = "git-xet-for-integration-test"), ignore)]
fn test_access_posix_commands() -> Result<()> {
let posix_commands_and_expected_output = [
(vec!["env"], "PATH="),
(vec!["chmod", "--version"], "chmod"),
(vec!["sh", "-c", "echo hello"], "hello"),
(vec!["ssh"], "usage: ssh"),
(vec!["uname", "-s"], "MINGW64"),
];
for (pc, expected_output) in posix_commands_and_expected_output {
let o = git_xet_run(pc)?;
// If command executed correctly, the return code should be 0;
// otherwise if "program not found" on executing the command, the return code should be non-zero.
assert_eq!(o.status.code(), Some(0));
// The execution should output some text containing the expected output pattern, either through
// stdout or stderr.
assert!(
String::from_utf8_lossy(&o.stdout).contains(expected_output)
|| String::from_utf8_lossy(&o.stderr).contains(expected_output)
);
}
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[cfg_attr(not(feature = "git-xet-for-integration-test"), ignore)]
async fn test_ssh_connect_through_ssh_cmd() -> Result<()> {
let (port, server_task) = start_local_ssh_server(None).await?;
let ssh_cmd = [
"ssh",
"-p",
&port.to_string(),
"-o",
"StrictHostKeyChecking no",
"git@localhost",
"git-lfs-authenticate",
"user/repo",
"upload",
];
let o = git_xet_run(ssh_cmd)?;
let response: GitLFSAuthenticateResponse = serde_json::from_slice(&o.stdout)?;
assert!(response.header.authorization.starts_with("Basic "));
assert_eq!(response.href, "https://huggingface.co/user/repo.git/info/lfs");
assert_eq!(response.expires_in, 3600);
server_task.abort();
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[cfg_attr(not(feature = "git-xet-for-integration-test"), ignore)]
async fn test_ssh_connect_through_sh_cmd() -> Result<()> {
let (port, server_task) = start_local_ssh_server(None).await?;
let sh_cmd = [
"sh",
"-c",
&format!("ssh -p {port} -o \"StrictHostKeyChecking no\" git@localhost git-lfs-authenticate user/repo upload"),
];
let o = git_xet_run(sh_cmd)?;
let response: GitLFSAuthenticateResponse = serde_json::from_slice(&o.stdout)?;
assert!(response.header.authorization.starts_with("Basic "));
assert_eq!(response.href, "https://huggingface.co/user/repo.git/info/lfs");
assert_eq!(response.expires_in, 3600);
server_task.abort();
Ok(())
}