//
// Syd: rock-solid application kernel
// src/syd.rs: Main entry point
//
// Copyright (c) 2023, 2024, 2025 Ali Polatel <alip@chesswob.org>
// Proxying code is based in part upon socksns crate which is:
//     Copyright (c) 2020 Steven Engler
//     SPDX-License-Identifier: MIT
//
// SPDX-License-Identifier: GPL-3.0

//! Syd: rock-solid application kernel
//! Main entry point.

// We like clean and simple code with documentation.
// Keep in sync with lib.rs.
#![deny(missing_docs)]
#![deny(clippy::arithmetic_side_effects)]
#![deny(clippy::as_ptr_cast_mut)]
#![deny(clippy::as_underscore)]
#![deny(clippy::assertions_on_result_states)]
#![deny(clippy::borrow_as_ptr)]
#![deny(clippy::branches_sharing_code)]
#![deny(clippy::case_sensitive_file_extension_comparisons)]
#![deny(clippy::cast_lossless)]
#![deny(clippy::cast_possible_truncation)]
#![deny(clippy::cast_possible_wrap)]
#![deny(clippy::cast_precision_loss)]
#![deny(clippy::cast_ptr_alignment)]
#![deny(clippy::cast_sign_loss)]
#![deny(clippy::checked_conversions)]
#![deny(clippy::clear_with_drain)]
#![deny(clippy::clone_on_ref_ptr)]
#![deny(clippy::cloned_instead_of_copied)]
#![deny(clippy::cognitive_complexity)]
#![deny(clippy::collection_is_never_read)]
#![deny(clippy::copy_iterator)]
#![deny(clippy::create_dir)]
#![deny(clippy::dbg_macro)]
#![deny(clippy::debug_assert_with_mut_call)]
#![deny(clippy::decimal_literal_representation)]
#![deny(clippy::default_trait_access)]
#![deny(clippy::default_union_representation)]
#![deny(clippy::derive_partial_eq_without_eq)]
#![deny(clippy::doc_link_with_quotes)]
#![deny(clippy::doc_markdown)]
#![deny(clippy::explicit_into_iter_loop)]
#![deny(clippy::explicit_iter_loop)]
#![deny(clippy::fallible_impl_from)]
#![deny(clippy::missing_safety_doc)]
#![deny(clippy::undocumented_unsafe_blocks)]

use std::{
    env,
    env::VarError,
    ffi::{OsStr, OsString},
    fs::{File, OpenOptions},
    io::{stdout, BufWriter, Write},
    net::{IpAddr, SocketAddrV4, SocketAddrV6},
    os::{
        fd::AsRawFd,
        unix::{ffi::OsStrExt, fs::OpenOptionsExt, net::UnixStream, process::CommandExt},
    },
    path::Path,
    process::{ExitCode, Stdio},
    str::FromStr,
};

use data_encoding::HEXLOWER;
use libseccomp::{scmp_cmp, ScmpAction, ScmpFilterContext, ScmpSyscall};
use nix::{
    errno::Errno,
    fcntl::{open, OFlag},
    libc::setdomainname,
    pty::{grantpt, posix_openpt, ptsname_r, unlockpt},
    sched::{unshare, CloneFlags},
    sys::{
        socket::{bind, socket, AddressFamily, SockFlag, SockType, SockaddrIn, SockaddrIn6},
        stat::Mode,
        wait::{Id, WaitPidFlag},
    },
    time::{clock_gettime, ClockId},
    unistd::{fork, getgid, getpid, getuid, isatty, sethostname, ForkResult},
};
use sendfd::SendWithFd;
use syd::{
    caps,
    compat::{waitid, WaitStatus},
    config::*,
    confine::ExportMode,
    err::err2no,
    error,
    fs::{closeexcept, format_clone_flags, format_clone_names, pidfd_open, set_cloexec},
    hash::{get_at_random_hex, hash, HashAlgorithm, SydHashMap, SydHashSet},
    hook::Supervisor,
    ignore_signals, info,
    landlock_policy::LandlockPolicy,
    log::log_init,
    loopback_set_up,
    path::XPathBuf,
    sandbox::Sandbox,
    seal::ensure_sealed,
    set_sigpipe_dfl, syd_code_name, syd_info,
    syslog::LogLevel,
    unshare::{GidMap, UidMap},
    warn, IgnoreSignalOpts,
};

// Set global allocator to mimalloc.
#[cfg(all(not(feature = "prof"), target_pointer_width = "64"))]
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;

// Set global allocator to tcmalloc if profiling is enabled.
#[cfg(feature = "prof")]
#[global_allocator]
static GLOBAL: tcmalloc::TCMalloc = tcmalloc::TCMalloc;

syd::main! {
    use lexopt::prelude::*;

    // Initialize logging..
    log_init(LogLevel::Warn, Some(libc::STDERR_FILENO))?;

    // Parse CLI options.
    //
    // Note, option parsing is POSIXly correct:
    // POSIX recommends that no more options are parsed after the first
    // positional argument. The other arguments are then all treated as
    // positional arguments.
    // See: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap12.html#tag_12_02
    let mut parser = lexopt::Parser::from_env();
    let is_login = parser
        .bin_name()
        .map(|name| name.starts_with('-'))
        .unwrap_or(false);
    let mut is_quick = env::var_os(ENV_QUICK_BOOT).is_some();

    // Handle quick options early before reexecution for convenience.
    if !is_login {
        if let Some(raw) = parser.try_raw_args() {
            if let Some(Some(arg)) = raw.peek().map(|arg| arg.to_str()) {
                match arg {
                    "-h" | "--help" => {
                        set_sigpipe_dfl()?;
                        help();
                        return Ok(ExitCode::SUCCESS);
                    }
                    "-C" | "--check" => {
                        set_sigpipe_dfl()?;
                        syd_info(true)?;
                        return Ok(ExitCode::SUCCESS);
                    }
                    "-V" | "--version" => {
                        set_sigpipe_dfl()?;
                        syd_info(false)?;
                        return Ok(ExitCode::SUCCESS);
                    }
                    "--el" => {
                        set_sigpipe_dfl()?;
                        stdout().write_all(SYD_EL.as_bytes())?;
                        return Ok(ExitCode::SUCCESS);
                    }
                    "--sh" => {
                        set_sigpipe_dfl()?;
                        stdout().write_all(ESYD_SH.as_bytes())?;
                        return Ok(ExitCode::SUCCESS);
                    }
                    "--api" => {
                        set_sigpipe_dfl()?;
                        #[expect(clippy::disallowed_methods)]
                        let api = serde_json::to_string_pretty(&*syd::api::API_SPEC).expect("JSON");
                        stdout().write_all(api.as_bytes())?;
                        return Ok(ExitCode::SUCCESS);
                    }
                    "-q" => is_quick = true,
                    _ => {}
                }
            }
        }
    }

    // Guard against CVE-2019-5736:
    // Copy /proc/self/exe in an anonymous fd (created via memfd_create), seal it and re-execute it.
    // See:
    // - https://github.com/opencontainers/runc/commit/0a8e4117e7f715d5fbeef398405813ce8e88558b
    // - https://github.com/lxc/lxc/commit/6400238d08cdf1ca20d49bafb85f4e224348bf9d
    // Note: syd's procfs protections is another layer of defense against this.
    #[expect(clippy::disallowed_methods)]
    let cookie = if !is_quick {
        match env::var(ENV_RAND) {
            Ok(cookie0) => {
                // Best-effort ensure cookie0 was not tampered.
                assert_eq!(cookie0.len(), 32,
                    "PANIC: Internal environment variable {ENV_RAND} tampered by user!");
                assert!(cookie0.bytes().all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase()),
                    "PANIC: Internal environment variable {ENV_RAND} tampered by user!");
                let cookie1 = get_at_random_hex(false);
                env::set_var(ENV_RAND, format!("{cookie0}{cookie1}"));
                info!("ctx": "set_random_cookie",
                    "cookie": [&cookie0, &cookie1], "src": "AT_RANDOM",
                    "msg": format!("appended random cookie from AT_RANDOM {cookie0}+{cookie1}={cookie0}{cookie1} after memfd-reexec"));
            }
            Err(VarError::NotPresent) => {
                let cookie = get_at_random_hex(false);
                env::set_var(ENV_RAND, &cookie);
                info!("ctx": "set_random_cookie",
                    "cookie": &cookie, "src": "AT_RANDOM",
                    "msg": format!("set random cookie from AT_RANDOM to {cookie}"));
            }
            Err(VarError::NotUnicode(cookie)) => {
                error!("ctx": "set_random_cookie",
                    "cookie": &cookie, "src": "AT_RANDOM", "err": libc::EINVAL,
                    "msg": format!("get random cookie from {ENV_RAND} failed: {}", Errno::EINVAL));
            }
        }

        match ensure_sealed() {
            Ok(()) => env::var(ENV_RAND).unwrap(),
            Err(errno) => {
                error!("ctx": "memfd_reexec",
                    "err": errno as i32,
                    "msg": format!("reexecute self with a sealed memfd failed: {errno}"),
                    "tip": "set SYD_QUICK_BOOT and/or submit a bug report");
                return Err(errno.into());
            }
        }
    } else {
        // See seal.rs for the other branch.
        // Rest is handled in unshare/child.rs
        match env::var_os("RUST_BACKTRACE") {
            Some(val) => env::set_var("SYD_RUST_BACKTRACE", val),
            None => env::remove_var("SYD_RUST_BACKTRACE"),
        };
        env::set_var("RUST_BACKTRACE", "0");
        env::set_var(ENV_RAND, get_at_random_hex(false));
        env::var(ENV_RAND).unwrap()
    };

    // Generate unique sandbox id from AT_RANDOM bytes.
    // Allow the user to override by setting SYD_ID.
    // Panic if SYD_ID is incorrectly formatted.
    #[expect(clippy::disallowed_methods)]
    if let Some(sandbox_id) = env::var_os(ENV_ID) {
        assert_eq!(sandbox_id.len(), 128,
            "PANIC: Sandbox ID in SYD_ID environment variable isn't in correct format!");
        assert!(sandbox_id.as_bytes().iter().all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase()),
            "PANIC: Sandbox ID in SYD_ID environment variable isn't in correct format!");
        let machine_id = &sandbox_id.as_bytes()[..32];
        assert!(machine_id.iter().any(|&b| b != b'0'),
            "PANIC: Sandbox ID in SYD_ID environment variable isn't in correct format!");
    } else {
        let sandbox_id = HEXLOWER.encode(&hash(cookie.as_bytes(), HashAlgorithm::Sha512).unwrap());
        env::set_var(ENV_ID, &sandbox_id);
        info!("ctx": "set_sandbox_id",
            "id": &sandbox_id, "cookie": &cookie, "hash": "sha3-512",
            "msg": format!("generated syd id:{sandbox_id} from cookie:{cookie} using SHA3-512"));
    }

    // SYD_PID_FN -> Write PID file.
    if let Some(pid_fn) = env::var_os(ENV_PID_FN).map(XPathBuf::from) {
        let pid = getpid().as_raw();

        let mut pid_str = itoa::Buffer::new();
        let pid_str = pid_str.format(pid);

        let mut openopts = OpenOptions::new();
        openopts
            .mode(0o400)
            .write(true)
            .create_new(true);
        #[expect(clippy::disallowed_methods)]
        let mut pid_file = match openopts.open(&pid_fn).map(BufWriter::new) {
            Ok(pid_file) => pid_file,
            Err(error) => {
                let errno = err2no(&error);
                error!("ctx": "write_pid_file",
                    "pid_file": &pid_fn, "err": errno as i32,
                    "msg": format!("pid file create error: {error}"),
                    "tip": format!("remove file `{pid_fn}' or unset SYD_PID_FN"));
                return Err(error.into());
            }
        };

        match pid_file.write_all(pid_str.as_bytes()) {
            Ok(_) => {
                info!("ctx": "write_pid_file",
                    "msg": format!("Syd pid {pid} written to file `{pid_fn}'"),
                    "pid_file": &pid_fn);
            }
            Err(error) => {
                let errno = err2no(&error);
                error!("ctx": "write_pid_file",
                    "pid_fn": &pid_fn, "err": errno as i32,
                    "msg": format!("pid file write error: {error}"),
                    "tip": format!("remove file `{pid_fn}' or unset SYD_PID_FN"));
                return Err(error.into());
            }
        }
    }

    // Parse CLI arguments
    let mut export: Option<ExportMode> = ExportMode::from_env();
    let mut sandbox: Sandbox = Sandbox::default();
    let mut cmd_arg0: Option<OsString> = None;
    let mut cmd_argv: Vec<OsString> = vec![];
    let mut cmd_envp: SydHashSet<OsString> = SydHashSet::default();

    // SYD_PROXY_{HOST,PORT,UNIX} -> proxy/ext/{host,port,unix}
    #[expect(clippy::disallowed_methods)]
    match env::var(ENV_PROXY_HOST) {
        Ok(host) => sandbox
            .config(&format!("proxy/ext/host:{host}"))
            .expect(ENV_PROXY_HOST),
        Err(env::VarError::NotPresent) => {}
        Err(error) => panic!("Invalid UTF-8 in {ENV_PROXY_HOST}: {error}"),
    };
    #[expect(clippy::disallowed_methods)]
    match env::var(ENV_PROXY_PORT) {
        Ok(port) => sandbox
            .config(&format!("proxy/ext/port:{port}"))
            .expect(ENV_PROXY_PORT),
        Err(env::VarError::NotPresent) => {}
        Err(error) => panic!("Invalid UTF-8 in {ENV_PROXY_PORT}: {error}"),
    };
    #[expect(clippy::disallowed_methods)]
    match env::var(ENV_PROXY_UNIX) {
        Ok(unix) => sandbox
            .config(&format!("proxy/ext/unix:{unix}"))
            .expect(ENV_PROXY_UNIX),
        Err(env::VarError::NotPresent) => {}
        Err(error) => panic!("Invalid UTF-8 in {ENV_PROXY_UNIX}: {error}"),
    };

    // Initialize Options.
    let mut user_parse = false;
    let user_done = if is_login
        || parser
            .try_raw_args()
            .map(|raw| raw.peek().is_none())
            .unwrap_or(true)
    {
        sandbox.parse_profile("user")?;
        true
    } else {
        false
    };

    // Local options handled by this function.
    let sh = env::var_os(ENV_SH).unwrap_or(OsString::from(SYD_SH));

    while let Some(arg) = parser.next()? {
        match arg {
            /*
             * Basic options
             */
            Short('h') | Long("help") => {
                set_sigpipe_dfl()?;
                help();
                return Ok(ExitCode::SUCCESS);
            }
            Short('C') | Long("check") => {
                set_sigpipe_dfl()?;
                syd_info(true)?;
                return Ok(ExitCode::SUCCESS);
            }
            // syd -V is called often by paludis.
            // We want to keep its output short and parseable.
            Short('V') | Long("version") => {
                set_sigpipe_dfl()?;
                syd_info(false)?;
                return Ok(ExitCode::SUCCESS);
            }
            Short('v') | Long("verbose") => sandbox.verbose = true,
            Long("el") => {
                set_sigpipe_dfl()?;
                stdout().write_all(SYD_EL.as_bytes())?;
                return Ok(ExitCode::SUCCESS);
            }
            Long("sh") => {
                set_sigpipe_dfl()?;
                stdout().write_all(ESYD_SH.as_bytes())?;
                return Ok(ExitCode::SUCCESS);
            }
            Long("api") => {
                set_sigpipe_dfl()?;
                #[expect(clippy::disallowed_methods)]
                let api = serde_json::to_string_pretty(&*syd::api::API_SPEC).expect("JSON");
                stdout().write_all(api.as_bytes())?;
                return Ok(ExitCode::SUCCESS);
            }
            Short('q') => {} // Ignore, must be first!

            /*
             * Sandbox options
             */
            Short('E') => {
                export = Some(
                    parser
                        .value()?
                        .parse::<String>()
                        .map(|arg| ExportMode::from_str(&arg))??,
                );
            }
            Short('x') => sandbox.parse_profile("trace")?,
            Short('m') => {
                let cmd = parser.value().map(XPathBuf::from)?;
                if sandbox.locked() {
                    eprintln!("Failed to execute magic command `{cmd}': sandbox locked!");
                    return Err(Errno::EBUSY.into());
                } else {
                    sandbox.config(&cmd.to_string())?;
                }
            }
            Short('f') => {
                // Login shell compatibility:
                // Parse user profile as necessary.
                user_parse = true;
            }
            Short('l') | Long("login") => {
                // Login shell compatibility:
                // Parse user profile as necessary.
                user_parse = true;
            }
            Short('c') => {
                // When multiple -c arguments are given,
                // only the first one is honoured and
                // the rest is ignored in consistency
                // with how bash and dash behaves.
                user_parse = true;
                if cmd_argv.is_empty() {
                    cmd_argv.push(sh.clone());
                    cmd_argv.push(OsString::from("-c"));
                    cmd_argv.push(parser.value()?);
                }
            }
            Short('P') => {
                let path = parser.value().map(XPathBuf::from)?;
                if sandbox.locked() {
                    eprintln!("Failed to parse config file `{path}': sandbox locked!");
                    return Err(Errno::EBUSY.into());
                }
                sandbox.parse_config_file(&path)?;
            }
            /* We keep --profile for syd-1 compatibility.
             * It's undocumented. */
            Short('p') | Long("profile") => {
                let profile = parser.value()?.parse::<String>()?;
                if sandbox.locked() {
                    eprintln!("Failed to parse profile `{profile}': sandbox locked!");
                    return Err(Errno::EBUSY.into());
                }
                sandbox.parse_profile(&profile)?;
            }

            /*
             * Unshare options
             */
            Short('a') => cmd_arg0 = Some(parser.value()?),
            Short('e') => {
                let value = parser.value()?.parse::<String>()?;
                match value.split_once('=') {
                    Some((var, val)) => {
                        cmd_envp.insert(OsString::from(var));
                        if !val.is_empty() {
                            // This way we give the user the chance to pass-through
                            // denylisted environment variables e.g.
                            //      syd -eLD_LIBRARY_PATH= cmd
                            // is equivalent to
                            //      syd -eLD_LIBRARY_PATH=$LD_LIBRARY_PATH cmd
                            env::set_var(var, val);
                        }
                    }
                    None => {
                        cmd_envp.remove(&OsString::from(value.clone()));
                        env::remove_var(value);
                    }
                }
            }

            // Profiling options.
            #[cfg(feature = "prof")]
            Long("prof") => match parser.value()?.parse::<String>()?.as_str() {
                "cpu" => env::set_var("SYD_PROF", "cpu"),
                "mem" => env::set_var("SYD_PROF", "mem"),
                val => {
                    eprintln!("Invalid profile mode `{val}'!");
                    eprintln!("Expected exactly one of `cpu' or `mem'!");
                    help();
                    return Ok(ExitCode::FAILURE);
                }
            },

            Value(prog) => {
                cmd_argv.push(prog);
                cmd_argv.extend(parser.raw_args()?);
            }
            _ => return Err(arg.unexpected().into()),
        }
    }

    if let Some(export_mode) = export {
        // SYD_DUMP_SCMP makes setup_seccomp_parent print rules.
        // In addition per-thread filters are printed out.
        match export_mode {
            ExportMode::BerkeleyPacketFilter => env::set_var(ENV_DUMP_SCMP, "bpf"),
            ExportMode::PseudoFiltercode => env::set_var(ENV_DUMP_SCMP, "pfc"),
        }

        // Note, we do not intervene with sandbox policy here, and let
        // the user configure it through other means. This way the user
        // can dump seccomp filters for different set of options.
    }

    if user_parse && !user_done && !sandbox.locked() {
        sandbox.parse_profile("user")?;
    }

    // Prepare the command to execute, which may be a login shell.
    let argv0 = if !cmd_argv.is_empty() {
        Some(cmd_argv.remove(0))
    } else {
        None
    };

    let argv0 = match (export.is_some(), argv0, is_login) {
        (true, _, _) => OsString::from(OsStr::from_bytes(MAGIC_FILE)),
        (false, Some(argv0), false) => argv0,
        (false, None, false) | (false, _, true) => {
            #[expect(clippy::disallowed_methods)]
            if cmd_arg0.is_none() {
                // Allow user to override with -a.
                cmd_arg0 = Some(match Path::new(&sh).file_name() {
                    None => OsString::from("-"),
                    Some(name) => {
                        let mut p = OsString::from("-");
                        p.push(name);
                        p
                    }
                });
            }
            sh
        }
    };

    // Ignore all signals except the following signals:
    // SIGALRM, SIGCHLD, SIGKILL, SIGSTOP.
    // Skip ignoring signals with default action Core,
    // if trace/allow_unsafe_prlimit:1 is set at startup.
    let mut opts = IgnoreSignalOpts::SkipIgnoreAlarm;
    if sandbox.flags.allow_unsafe_prlimit() {
        opts.insert(IgnoreSignalOpts::SkipIgnoreCoreDump);
    }
    match ignore_signals(opts) {
        Ok(()) => {
            info!("ctx": "ignore_signals",
                "opt": opts, "msg": "ignored signals");
        }
        Err(errno) => {
            error!("ctx": "ignore_signals",
                "opt": opts, "err": errno as i32,
                "msg": format!("ignoring signals failed: {errno}"),
                "tip": "check with SYD_LOG=debug and/or submit a bug report");
            return Err(errno.into());
        }
    }

    // SAFETY: We cannot support NEWPID without NEWNS.
    // ie, pid namespace must have its own private /proc.
    if sandbox.flags.unshare_pid() {
        sandbox.set_unshare_mount(true);
    }
    if sandbox.flags.unshare_mount() {
        sandbox.set_unshare_pid(true);
    }

    let pty_debug = env::var_os("SYD_PTY_DEBUG").is_some();
    let pty_child = if sandbox.has_pty()
        && isatty(std::io::stdin()).unwrap_or(false)
        && isatty(std::io::stdout()).unwrap_or(false)
    {
        // Step 1: Create a PIDFd of this process and clear O_CLOEXEC.
        let pidfd = pidfd_open(getpid(), libc::PIDFD_NONBLOCK)?;
        set_cloexec(&pidfd, false)?;

        // Step 2: Open a pseudoterminal device without O_CLOEXEC.
        let pty_m = posix_openpt(OFlag::O_RDWR | OFlag::O_NOCTTY | OFlag::O_NOFOLLOW)?;
        grantpt(&pty_m)?;
        unlockpt(&pty_m)?;
        let pty_s = ptsname_r(&pty_m).map(XPathBuf::from)?;
        #[expect(clippy::disallowed_methods)]
        let pty_s = open(
            &pty_s,
            OFlag::O_RDWR | OFlag::O_NOCTTY | OFlag::O_NOFOLLOW | OFlag::O_CLOEXEC,
            Mode::empty(),
        )?;

        // Step 3: Prepare environment of the syd-pty process.
        // Filter the environment variables to only include the list below:
        // 1. PATH
        // 2. LD_LIBRARY_PATH
        // 3. SYD_PTY_DEBUG
        // 4. SYD_PTY_RULES
        let safe_env: SydHashMap<_, _> = env::vars_os()
            .filter(|(key, _)| {
                matches!(
                    key.as_bytes(),
                    b"PATH" | b"LD_LIBRARY_PATH" | b"SYD_PTY_DEBUG" | b"SYD_PTY_RULES"
                )
            })
            .collect();

        // Step 5: Spawn syd-pty process, and pass PTY main end to it.
        let mut cmd = std::process::Command::new(
            env::var_os("CARGO_BIN_EXE_syd-pty").unwrap_or(OsString::from("syd-pty")),
        );
        if pty_debug {
            cmd.arg("-d");
        }
        cmd.arg(format!("-p{}", pidfd.as_raw_fd()));
        cmd.arg(format!("-i{}", pty_m.as_raw_fd()));
        if let Some(ws) = sandbox.pty_ws_x() {
            cmd.arg(format!("-x{ws}"));
        }
        if let Some(ws) = sandbox.pty_ws_y() {
            cmd.arg(format!("-y{ws}"));
        }
        cmd.env_clear();
        cmd.envs(safe_env);
        cmd.stdin(Stdio::inherit());
        cmd.stdout(Stdio::inherit());
        if !pty_debug {
            cmd.stderr(Stdio::null());
        }
        if let Err(error) = cmd.spawn() {
            let errno = err2no(&error);
            error!("ctx": "spawn_pty",
                "msg": format!("syd-pty spawn error: {error}"),
                "tip": "put syd-pty(1) in PATH, set CARGO_BIN_EXE_syd-pty, or set sandbox/pty:off",
                "err": errno as i32);
            return Err(error.into());
        }
        drop(pidfd);
        drop(pty_m);
        if pty_debug {
            warn!("ctx": "spawn_pty",
                "msg": "syd-pty is now forwarding terminal I/O");
        } else {
            info!("ctx": "spawn_pty",
                "msg": "syd-pty is now forwarding terminal I/O");
        }

        // Step 6: Pass the other end of the PTY pair
        // to the sandbox process.
        env::set_var(ENV_PTY_FD, pty_s.as_raw_fd().to_string());
        Some(pty_s)
    } else {
        env::remove_var(ENV_PTY_FD);
        None
    };

    let proxy_debug = env::var_os("SYD_TOR_DEBUG").is_some();
    let proxy = if sandbox.has_proxy() {
        // sandbox/proxy:on implies unshare/net:1
        sandbox.set_unshare_net(true);

        // Step 1: Create a PIDFd of this process.
        let pidfd = pidfd_open(getpid(), libc::PIDFD_NONBLOCK)?;

        // Step 2: Create a UNIX socket pair.
        let (stream_parent, stream_child) = UnixStream::pair()?;

        // Step 3: Unset the CLOEXEC flags on the file descriptors.
        // PIDFds and Rust sockets are automatically CLOEXEC.
        set_cloexec(&pidfd, false)?;
        set_cloexec(&stream_parent, false)?;

        // Step 4: Prepare environment of the syd-tor process.
        // Filter the environment variables to only include the list below:
        // 1. PATH
        // 2. LD_LIBRARY_PATH
        // 3. SYD_TOR_DEBUG
        // 4. SYD_TOR_RULES
        let safe_env: SydHashMap<_, _> = env::vars_os()
            .filter(|(key, _)| {
                matches!(
                    key.as_bytes(),
                    b"PATH" | b"LD_LIBRARY_PATH" | b"SYD_TOR_DEBUG" | b"SYD_TOR_RULES"
                )
            })
            .collect();

        // Step 5: Spawn syd-tor process outside the namespace.
        // Pass one end of the socket-pair to it.
        let mut cmd = std::process::Command::new(
            env::var_os("CARGO_BIN_EXE_syd-tor").unwrap_or(OsString::from("syd-tor")),
        );
        if proxy_debug {
            cmd.arg("-d");
        }
        cmd.arg(format!("-p{}", pidfd.as_raw_fd()));
        cmd.arg(format!("-i{}", stream_parent.as_raw_fd()));
        // proxy/ext/unix has precedence over proxy/ext/host.
        if let Some(ref proxy_ext_unix) = sandbox.proxy_ext_unix {
            cmd.arg("-u");
            cmd.arg(proxy_ext_unix);
        } else {
            cmd.arg("-o");
            cmd.arg(format!(
                "{}:{}",
                sandbox.proxy_ext_addr, sandbox.proxy_ext_port
            ));
        }
        cmd.env_clear();
        cmd.envs(safe_env);
        cmd.process_group(0);
        cmd.stdin(Stdio::null());
        cmd.stdout(Stdio::null());
        if !proxy_debug {
            cmd.stderr(Stdio::null());
        }
        if let Err(error) = cmd.spawn() {
            let errno = err2no(&error);
            error!("ctx": "spawn_tor",
                "msg": format!("syd-tor spawn error: {error}"),
                "tip": "put syd-tor(1) in PATH, set CARGO_BIN_EXE_syd-tor, or set sandbox/proxy:off",
                "err": errno as i32);
            return Err(error.into());
        }
        drop(pidfd);

        let proxy_repr = sandbox
            .proxy_ext_unix
            .as_ref()
            .map(|proxy_ext_unix| proxy_ext_unix.to_string())
            .unwrap_or_else(|| format!("{}!{}", sandbox.proxy_ext_addr, sandbox.proxy_ext_port));
        if proxy_debug {
            warn!("ctx": "spawn_proxy",
                "msg": format!("proxy is now forwarding external traffic to {proxy_repr}"));
        } else {
            info!("ctx": "spawn_proxy",
                "msg": format!("proxy is now forwarding external traffic to {proxy_repr}"));
        }

        // Step 4: Pass the other end of the socket-pair
        // to the new namespace.
        Some((stream_child, sandbox.proxy_port))
    } else {
        None
    };

    // Set up Linux namespaces if requested. Note,
    // we set it up here before spawning the child so as to
    // include the Syd process into the pid namespace as well
    // such that the sandbox process and syd have the identical
    // view of /proc.
    let namespaces = sandbox.flags.namespaces();
    if namespaces == 0 {
        // No namespace arguments passed, run normally.
        return match Supervisor::run(
            sandbox,
            pty_child,
            &argv0,
            cmd_argv,
            Some(&cmd_envp),
            cmd_arg0,
        ) {
            Ok(code) => Ok(ExitCode::from(code)),
            Err(error) => {
                let errno = Errno::last();
                eprintln!("{error:?}");
                Ok(ExitCode::from(u8::try_from(errno as i32).unwrap_or(127)))
            }
        };
    }

    let id_buf = if sandbox.flags.unshare_user() {
        // create the UID and GID mappings.
        let uid = getuid().as_raw();
        let gid = getgid().as_raw();

        let map_root = sandbox.flags.map_root();

        let uid_buf = {
            let uid_maps = vec![
                UidMap {
                    inside_uid: if map_root { 0 } else { uid },
                    outside_uid: uid,
                    count: 1,
                }, // Map the current user.
            ];
            let mut buf = Vec::new();
            for map in uid_maps {
                writeln!(
                    &mut buf,
                    "{} {} {}",
                    map.inside_uid, map.outside_uid, map.count
                )?;
            }
            buf
        };

        let gid_buf = {
            let gid_maps = vec![
                GidMap {
                    inside_gid: if map_root { 0 } else { gid },
                    outside_gid: gid,
                    count: 1,
                }, // Map the current group.
            ];
            let mut buf = Vec::new();
            for map in gid_maps {
                writeln!(
                    &mut buf,
                    "{} {} {}",
                    map.inside_gid, map.outside_gid, map.count
                )?;
            }
            buf
        };
        Some((uid_buf, gid_buf))
    } else {
        None
    };

    // Tell the kernel to keep the capabilities after the unshare call.
    // This is important because unshare() can change the user
    // namespace, which often leads to a loss of capabilities.
    caps::securebits::set_keepcaps(true)?;

    // CLONE_NEWTIME may only be used with unshare(2).
    // CloneFlags don't support CLONE_NEWTIME directly so we use retain.
    let clone_flags = CloneFlags::from_bits_retain(namespaces);
    let clone_names = format_clone_flags(clone_flags);
    let clone_types = format_clone_names(&clone_names);
    match unshare(clone_flags) {
        Ok(()) => {
            info!("ctx": "unshare_namespaces",
                "ns": clone_names,
                "msg": format!("unshared into {clone_types}"));
        }
        Err(errno) => {
            error!("ctx": "unshare_namespaces",
                "ns": clone_names, "err": errno as i32,
                "msg": format!("unshare into {clone_types} failed: {errno}"),
                "tip": "check with SYD_LOG=debug and/or set `unshare/user:1'");
            return Err(errno.into());
        }
    }

    // Write uid/gid map for user namespace.
    #[expect(clippy::disallowed_methods)]
    if let Some((ref uid_buf, ref gid_buf)) = id_buf {
        // Write "deny" to /proc/self/setgroups before writing to gid_map.
        File::create("/proc/self/setgroups").and_then(|mut f| f.write_all(b"deny"))?;
        File::create("/proc/self/gid_map").and_then(|mut f| f.write_all(&gid_buf[..]))?;
        File::create("/proc/self/uid_map").and_then(|mut f| f.write_all(&uid_buf[..]))?;

        // Set inheritable mask and ambient caps to retain caps after execve(2).
        caps::securebits::set_keepcaps(true)?;
        let permitted_caps = caps::read(None, caps::CapSet::Permitted)?;
        caps::set(None, caps::CapSet::Inheritable, permitted_caps)?;
        // Set the same capabilities as ambient, if necessary.
        for flag in permitted_caps {
            let cap = flag.try_into()?;
            caps::raise(None, caps::CapSet::Ambient, cap)?;
        }
    }

    // Bring up loopback device for net namespace.
    if sandbox.flags.unshare_net() {
        // Set up the loopback interface.
        // Warn on errors and continue.
        match loopback_set_up() {
            Ok(loindex) => {
                info!("ctx": "loopback_set_up",
                    "msg": format!("loopback network device is up with index:{loindex:#x}"),
                    "idx": loindex);
            }
            Err(errno) => {
                error!("ctx": "loopback_set_up",
                    "msg": format!("set up loopback network device error: {errno}"),
                    "err": errno as i32);
            }
        };

        // Handle proxy sandboxing.
        // Warn on errors and continue.
        if let Some((stream_child, proxy_port)) = proxy {
            let proxy_addr = sandbox.proxy_addr;
            let ipv = if proxy_addr.is_ipv6() { 6 } else { 4 };
            let fml = if ipv == 6 {
                AddressFamily::Inet6
            } else {
                AddressFamily::Inet
            };
            let lfd = socket(
                fml,
                SockType::Stream,
                SockFlag::SOCK_NONBLOCK | SockFlag::SOCK_CLOEXEC,
                None,
            )?;
            let ret = match proxy_addr {
                IpAddr::V4(addr_v4) => {
                    let sockaddr = SockaddrIn::from(SocketAddrV4::new(addr_v4, proxy_port));
                    bind(lfd.as_raw_fd(), &sockaddr)
                }
                IpAddr::V6(addr_v6) => {
                    let sockaddr = SockaddrIn6::from(SocketAddrV6::new(addr_v6, proxy_port, 0, 0));
                    bind(lfd.as_raw_fd(), &sockaddr)
                }
            };
            match ret {
                Ok(()) => {
                    if proxy_debug {
                        warn!("ctx": "bind_proxy",
                            "msg": format!("proxy is now listening incoming IPv{ipv} requests from {proxy_addr}!{proxy_port}"));
                    } else {
                        info!("ctx": "bind_proxy",
                            "msg": format!("proxy is now listening incoming IPv{ipv} requests from {proxy_addr}!{proxy_port}"));
                    }
                    let buf = [0u8; 1];
                    let fds = [lfd.as_raw_fd()];
                    match stream_child.send_with_fd(&buf, &fds) {
                        Ok(_) => {
                            if proxy_debug {
                                warn!("ctx": "send_proxy_fd",
                                    "msg": format!("proxy fd {} sent to syd-tor, IPv{ipv} traffic forwarding is now started \\o/",
                                        lfd.as_raw_fd()));
                                warn!("ctx": "send_proxy_fd",
                                    "syd": "ping",
                                    "msg": "Change return success. Going and coming without error.");
                            } else {
                                info!("ctx": "send_proxy_fd",
                                    "msg": format!("proxy fd {} sent to syd-tor, IPv{ipv} traffic forwarding is now started \\o/",
                                        lfd.as_raw_fd()));
                                info!("ctx": "send_proxy_fd", "syd": "ping", "msg": "Change return success. Going and coming without error.");
                            }
                        }
                        Err(error) => {
                            error!("ctx": "send_proxy_fd",
                                "msg": format!("proxy fd {} send to syd-tor error: {}, traffic will not be forwarded",
                                    lfd.as_raw_fd(), err2no(&error)),
                                "err": err2no(&error) as i32);
                        }
                    }
                }
                Err(errno) => {
                    error!("ctx": "bind_proxy",
                        "msg": format!("proxy bind to IPv{ipv} {proxy_addr}!{proxy_port} error: {errno}"),
                        "err": errno as i32);
                }
            }
            drop(stream_child);
        }
    }

    // Set host and domain name for uts namespace.
    // Log on errors and continue.
    if sandbox.flags.unshare_uts() {
        let domainname = sandbox.domainname.as_c_str();
        let domainsize = domainname.to_bytes().len(); // without NUL.
        let domainname = domainname.as_ptr().cast();

        // SAFETY: No setdomainname(2) wrapper in nix yet.
        if let Err(errno) = Errno::result(unsafe { setdomainname(domainname, domainsize) }) {
            error!("ctx": "uts", "op": "setdomainname",
                "msg": format!("set NIS/YP domain name failed: {errno}"),
                "err": errno as i32);
        }

        let hostname = sandbox.hostname.as_c_str();
        let hostname = OsStr::from_bytes(hostname.to_bytes());
        if let Err(errno) = sethostname(hostname) {
            error!("ctx": "uts", "op": "sethostname",
                "msg": format!("set host name failed: {errno}"),
                "err": errno as i32);
        }
    }

    if sandbox.flags.unshare_time() {
        #[expect(clippy::disallowed_methods)]
        if let Some(offset) = sandbox.time {
            let mut file = File::create("/proc/self/timens_offsets")?;
            let contents = format!("monotonic {offset} 0\nboottime {offset} 0\n");
            file.write_all(contents.as_bytes())?;
            info!("ctx": "set_boot_time", "msg": "set boot time in time namespace");
        } else if let Ok(boottime) = clock_gettime(ClockId::CLOCK_BOOTTIME) {
            // Set uptime to 0 for time namespace. Ignore errors.
            if let Ok(mut file) = File::create("/proc/self/timens_offsets") {
                #[expect(clippy::arithmetic_side_effects)]
                let contents = format!("boottime {} 0\n", -boottime.tv_sec());
                let _ = file.write_all(contents.as_bytes());
            }
            info!("ctx": "set_boot_time", "msg": "reset boot time in time namespace");
        }
    }

    // Disable Speculative Store Bypass mitigations
    // for trace/allow_unsafe_spec_exec:1
    let ssb = sandbox.flags.allow_unsafe_spec_exec();

    // SAFETY: fork is our safest option here.
    let child = match unsafe { fork() }? {
        ForkResult::Child => {
            let retval = match Supervisor::run(
                sandbox,
                pty_child,
                &argv0,
                cmd_argv,
                Some(&cmd_envp),
                cmd_arg0,
            ) {
                Ok(retval) => libc::c_int::from(retval),
                Err(error) => {
                    let errno = Errno::last();
                    eprintln!("{error:?}");
                    errno as i32
                }
            };
            // SAFETY: _exit is safe to call in child.
            unsafe { libc::_exit(retval) };
        }
        ForkResult::Parent { child, .. } => {
            // Ensure we release all unneeded resources.
            drop(cmd_arg0);
            drop(cmd_argv);
            drop(cmd_envp);
            drop(pty_child);
            drop(sandbox);

            child
        }
    };

    // Only parent ends up here and `child` is child pid.
    //
    // Unshare CLONE_FS|CLONE_FILES for isolation.
    // Close all file descriptors but standard error.
    unshare(CloneFlags::CLONE_FS | CloneFlags::CLONE_FILES)?;
    closeexcept(&[libc::STDERR_FILENO as libc::c_uint])?;

    // SAFETY: Set up a Landlock sandbox to disallow all access.
    let abi = syd::landlock::ABI::new_current();
    let policy = LandlockPolicy {
        scoped_abs: true,
        scoped_sig: true,

        ..Default::default()
    };
    let _ = policy.restrict_self(abi);

    // SAFETY: Set up a seccomp filter which only allows
    // 1. write to standard error.
    // 2. waitid and exit.
    // 3. memory allocation syscalls
    // 4. signal handling syscalls
    let mut ctx = ScmpFilterContext::new(ScmpAction::KillProcess)?;

    // Enforce the NO_NEW_PRIVS functionality before
    // loading the seccomp filter into the kernel.
    ctx.set_ctl_nnp(true)?;

    // Disable Speculative Store Bypass mitigations
    // with trace/allow_unsafe_spec_exec:1
    ctx.set_ctl_ssb(ssb)?;

    // DO NOT synchronize filter to all threads.
    // Main thread will confine itself.
    ctx.set_ctl_tsync(false)?;

    // We kill for bad system call and bad arch.
    ctx.set_act_badarch(ScmpAction::KillProcess)?;

    // Use a binary tree sorted by syscall number if possible.
    let _ = ctx.set_ctl_optimize(2);

    // SAFETY: Do NOT add supported architectures to the filter.
    // This ensures Syd can never run a non-native system call,
    // which we do not need at all.
    // seccomp_add_architectures(&mut ctx)?;

    let allow_call = [
        "exit",
        "exit_group",
        "waitid",
        "brk",
        "madvise",
        "mremap",
        "munmap",
        "sigaction",
        "sigaltstack",
        "sigpending",
        "sigprocmask",
        "sigsuspend",
        "sigreturn",
        "rt_sigaction",
        "rt_sigpending",
        "rt_sigprocmask",
        "rt_sigqueueinfo",
        "rt_sigreturn",
        "rt_sigtimedwait",
        "rt_sigtimedwait_time64",
        #[cfg(feature = "prof")]
        "getpid",
        #[cfg(feature = "prof")]
        "gettid",
    ];
    for name in allow_call {
        if let Ok(syscall) = ScmpSyscall::from_name(name) {
            ctx.add_rule(ScmpAction::Allow, syscall)?;
        }
    }

    // Allow write(2) to standard error.
    if let Ok(syscall) = ScmpSyscall::from_name("write") {
        ctx.add_rule_conditional(
            ScmpAction::Allow,
            syscall,
            &[scmp_cmp!($arg0 == libc::STDERR_FILENO as u64)],
        )?;
    }

    // Prevent executable memory.
    const PROT_EXEC: u64 = libc::PROT_EXEC as u64;
    for name in ["mmap", "mmap2", "mprotect"] {
        #[expect(clippy::disallowed_methods)]
        ctx.add_rule_conditional(
            ScmpAction::Allow,
            ScmpSyscall::from_name(name).unwrap(),
            &[scmp_cmp!($arg2 & PROT_EXEC == 0)],
        )?;
    }

    ctx.load()?;

    loop {
        #[expect(clippy::cast_possible_truncation)]
        #[expect(clippy::cast_sign_loss)]
        break match waitid(Id::Pid(child), WaitPidFlag::WEXITED) {
            Ok(WaitStatus::Exited(_, code)) =>
            {
                #[expect(clippy::cast_possible_truncation)]
                #[expect(clippy::cast_sign_loss)]
                Ok(ExitCode::from(code as u8))
            }
            Ok(WaitStatus::Signaled(_, signal, _)) => {
                Ok(ExitCode::from(128_u8.saturating_add(signal as u8)))
            }
            Ok(WaitStatus::StillAlive) | Err(Errno::EINTR) => continue,
            Ok(_status) => Err(Errno::EINVAL.into()),
            Err(errno) => Err(errno.into()),
        };
    }
}

fn help() {
    let mut c_blue = "\x1b[0;1;35;95m";
    let mut c_bold = "\x1b[1m";
    let mut c_cyan = "\x1b[0;1;36;96m";
    let mut c_green = "\x1b[0;1;32;92m";
    let mut c_orng = "\x1b[0;1;34;94m";
    let mut c_red = "\x1b[0;1;31;91m";
    let mut c_res = "\x1b[0m";
    let mut c_yll = "\x1b[0;1;33;93m";
    if !isatty(std::io::stdout()).unwrap_or(false) {
        c_blue = "";
        c_bold = "";
        c_cyan = "";
        c_green = "";
        c_orng = "";
        c_red = "";
        c_res = "";
        c_yll = "";
    }

    println!(
        "{c_red}syd{c_res} {c_cyan}{}{c_res} ({c_orng}{}{c_res})",
        *syd::config::VERSION,
        syd_code_name()
    );
    println!("{c_yll}Rock solid application kernel{c_res}");
    println!("{c_blue}Author:{c_res} {c_yll}Ali Polatel{c_res} <{c_bold}alip@chesswob.org{c_res}>");
    println!("{c_blue}License:{c_res} {c_yll}GPL-3.0-only{c_res}");
    println!();
    println!("{c_green}$ syd [-acefhlmpqxEPV] [--] {{command [arg...]}}{c_res}");
    println!("  {c_bold}Run a program under syd.{c_res}");
    println!("{c_green}$ syd [-acefhlmpqxEPV] [--] {{library.so}}{c_res}");
    println!("  {c_bold}Load a library under syd.{c_res}");
    println!("{c_green}$ syd --api{c_res}");
    println!("  {c_bold}Print syd(2) API specification.{c_res}");
    println!("{c_green}$ syd --check{c_res}");
    println!("  {c_bold}Print sandboxing support information.{c_res}");
    println!("{c_green}$ syd --el{c_res}");
    println!("  {c_bold}Output syd.el the Emacs Lisp implementation of syd(2) interface.{c_res}");
    println!("{c_green}$ syd --sh{c_res}");
    println!("  {c_bold}Output a shell script which defines the esyd helper function.{c_res}");
    println!();
    print!("{SEE_EMILY_PLAY}");
    println!();
    println!("{c_orng}Send bug reports to{c_res} {c_bold}https://gitlab.exherbo.org/groups/sydbox/-/issues{c_res}");
    println!("{c_orng}Attaching poems encourages consideration tremendously.{c_res}");
}
