haruspex/
lib.rs

1//!
2//! haruspex - Tool to extract IDA decompiler's pseudo-code
3//! Copyright (c) 2024-2025 Marco Ivaldi <raptor@0xdeadbeef.info>
4//!
5//! > "Hacking is the discipline of questioning all your assumptions all of the time."
6//! >
7//! > -- Dave Aitel
8//!
9//! Haruspex is a blazing fast IDA Pro headless plugin that extracts pseudo-code generated by IDA Pro's
10//! decompiler in a format that should be suitable to be imported into an IDE, or parsed by static
11//! analysis tools such as [Semgrep](https://semgrep.dev/), [weggli](https://github.com/weggli-rs/weggli),
12//! or [oneiromancer](https://crates.io/crates/oneiromancer).
13//!
14//! ## Features
15//! * Blazing fast, headless user experience courtesy of IDA Pro 9 and Binarly's idalib Rust bindings.
16//! * Support for binary targets for any architecture implemented by IDA Pro's Hex-Rays decompiler.
17//! * Pseudo-code of each function is stored in a separated file in the output directory for easy inspection.
18//! * External crates can invoke [`decompile_to_file`] to decompile a function and save its pseudo-code to disk.
19//!
20//! ## Blog post
21//! * <https://security.humanativaspa.it/streamlining-vulnerability-research-with-ida-pro-and-rust>
22//!
23//! ## See also
24//! * <https://github.com/0xdea/ghidra-scripts/blob/main/Haruspex.java>
25//! * <https://github.com/0xdea/semgrep-rules>
26//! * <https://github.com/0xdea/weggli-patterns>
27//! * <https://docs.hex-rays.com/release-notes/9_0#headless-processing-with-idalib>
28//! * <https://github.com/binarly-io/idalib>
29//! * <https://github.com/xorpse/parascope>
30//! * <https://security.humanativaspa.it/automating-binary-vulnerability-discovery-with-ghidra-and-semgrep>
31//!
32//! ## Installing
33//! The easiest way to get the latest release is via [crates.io](https://crates.io/crates/haruspex):
34//! 1. Download, install, and configure IDA Pro (see <https://hex-rays.com/ida-pro>).
35//! 2. Download and extract the IDA SDK (see <https://docs.hex-rays.com/developer-guide>).
36//! 3. Install LLVM/Clang (see <https://rust-lang.github.io/rust-bindgen/requirements.html>).
37//! 4. On Linux/macOS, install as follows:
38//!     ```sh
39//!     export IDASDKDIR=/path/to/idasdk
40//!     export IDADIR=/path/to/ida # if not set, the build script will check common locations
41//!     cargo install haruspex
42//!     ```
43//!    On Windows, instead, use the following commands:
44//!     ```powershell
45//!     $env:LIBCLANG_PATH="\path\to\clang+llvm\bin"
46//!     $env:PATH="\path\to\ida;$env:PATH"
47//!     $env:IDASDKDIR="\path\to\idasdk"
48//!     $env:IDADIR="\path\to\ida" # if not set, the build script will check common locations
49//!     cargo install haruspex
50//!     ```
51//!
52//! ## Compiling
53//! Alternatively, you can build from [source](https://github.com/0xdea/haruspex):
54//! 1. Download, install, and configure IDA Pro (see <https://hex-rays.com/ida-pro>).
55//! 2. Download and extract the IDA SDK (see <https://docs.hex-rays.com/developer-guide>).
56//! 3. Install LLVM/Clang (see <https://rust-lang.github.io/rust-bindgen/requirements.html>).
57//! 4. On Linux/macOS, compile as follows:
58//!     ```sh
59//!     git clone --depth 1 https://github.com/0xdea/haruspex
60//!     cd haruspex
61//!     export IDASDKDIR=/path/to/idasdk # or edit .cargo/config.toml
62//!     export IDADIR=/path/to/ida # if not set, the build script will check common locations
63//!     cargo build --release
64//!     ```
65//!    On Windows, instead, use the following commands:
66//!     ```powershell
67//!     git clone --depth 1 https://github.com/0xdea/haruspex
68//!     cd haruspex
69//!     $env:LIBCLANG_PATH="\path\to\clang+llvm\bin"
70//!     $env:PATH="\path\to\ida;$env:PATH"
71//!     $env:IDASDKDIR="\path\to\idasdk"
72//!     $env:IDADIR="\path\to\ida" # if not set, the build script will check common locations
73//!     cargo build --release
74//!     ```
75//!
76//! ## Usage
77//! 1. Make sure IDA Pro is properly configured with a valid license.
78//! 2. Run as follows:
79//!     ```sh
80//!     haruspex <binary_file>
81//!     ```
82//! 3. Find the extracted pseudo-code of each decompiled function in the `binary_file.dec` directory:
83//!     ```sh
84//!     vim <binary_file>.dec
85//!     code <binary_file>.dec
86//!     ```
87//!
88//! ## Compatibility
89//! * IDA Pro 9.0.240925 - Latest compatible: v0.1.3.
90//! * IDA Pro 9.0.241217 - Latest compatible: v0.4.2.
91//! * IDA Pro 9.1.250226 - Latest compatible: current version.
92//!
93//! *Note: check [idalib](https://github.com/binarly-io/idalib) documentation for additional information.*
94//!
95//! ## Changelog
96//! * <https://github.com/0xdea/haruspex/blob/master/CHANGELOG.md>
97//!
98//! ## TODO
99//! * Integrate with Semgrep scanning (see <https://github.com/0xdea/semgrep-rules>).
100//! * Integrate with weggli scanning (see <https://github.com/0xdea/weggli-patterns>).
101//! * Improve decompiler output in the style of [HexRaysPyTools](https://github.com/igogo-x86/HexRaysPyTools)
102//!   and [abyss](https://github.com/patois/abyss).
103//! * Implement parallel analysis (see <https://github.com/fugue-re/fugue-mptp>).
104//!
105
106#![doc(html_logo_url = "https://raw.githubusercontent.com/0xdea/haruspex/master/.img/logo.png")]
107
108use std::fs;
109use std::fs::File;
110use std::io::{BufWriter, Write};
111use std::path::Path;
112use std::sync::atomic::{AtomicUsize, Ordering};
113
114use anyhow::Context;
115use idalib::IDAError;
116use idalib::decompiler::HexRaysErrorCode;
117use idalib::func::{Function, FunctionFlags};
118use idalib::idb::IDB;
119use thiserror::Error;
120
121/// Number of decompiled functions
122static COUNTER: AtomicUsize = AtomicUsize::new(0);
123
124/// Reserved characters in filenames
125#[cfg(unix)]
126static RESERVED_CHARS: &[char] = &['.', '/'];
127#[cfg(windows)]
128static RESERVED_CHARS: &[char] = &['.', '/', '<', '>', ':', '"', '\\', '|', '?', '*'];
129
130/// Maximum length of filenames
131static MAX_FILENAME_LEN: usize = 64;
132
133/// Haruspex error type
134#[derive(Error, Debug)]
135pub enum HaruspexError {
136    /// Failure in decompiling the function
137    #[error(transparent)]
138    DecompileFailed(#[from] IDAError),
139    /// Failure in writing to the output file
140    #[error(transparent)]
141    FileWriteFailed(#[from] std::io::Error),
142}
143
144/// Extract pseudo-code of functions in the binary file at `filepath` and save it in `filepath.dec`.
145///
146/// ## Errors
147///
148/// Returns how many functions were decompiled, or a generic error in case something goes wrong.
149pub fn run(filepath: &Path) -> anyhow::Result<usize> {
150    // Open the target binary and run auto-analysis
151    println!("[*] Trying to analyze binary file `{}`", filepath.display());
152    let idb = IDB::open(filepath)
153        .with_context(|| format!("Failed to analyze binary file `{}`", filepath.display()))?;
154    println!("[+] Successfully analyzed binary file");
155    println!();
156
157    // Print binary file information
158    println!("[-] Processor: {}", idb.processor().long_name());
159    println!("[-] Compiler: {:?}", idb.meta().cc_id());
160    println!("[-] File type: {:?}", idb.meta().filetype());
161    println!();
162
163    // Check if Hex-Rays decompiler is available
164    if !idb.decompiler_available() {
165        return Err(anyhow::anyhow!("Decompiler is not available"));
166    }
167
168    // Create a new output directory, returning an error if it already exists, and it's not empty
169    let dirpath = filepath.with_extension("dec");
170    println!("[*] Preparing output directory `{}`", dirpath.display());
171    if dirpath.exists() {
172        fs::remove_dir(&dirpath).map_err(|_| anyhow::anyhow!("Output directory already exists"))?;
173    }
174    fs::create_dir_all(&dirpath)
175        .with_context(|| format!("Failed to create directory `{}`", dirpath.display()))?;
176    println!("[+] Output directory is ready");
177
178    // Extract pseudo-code of functions
179    println!();
180    println!("[*] Extracting pseudo-code of functions...");
181    println!();
182    for (_id, f) in idb.functions() {
183        // Skip the function if it has the `thunk` attribute
184        if f.flags().contains(FunctionFlags::THUNK) {
185            continue;
186        }
187
188        // Decompile function and write pseudo-code to the output file
189        let func_name = f.name().unwrap_or_else(|| "<no name>".into());
190        let output_file = format!(
191            "{}@{:X}",
192            func_name
193                .replace(RESERVED_CHARS, "_")
194                .chars()
195                .take(MAX_FILENAME_LEN)
196                .collect::<String>(),
197            f.start_address()
198        );
199        let output_path = dirpath.join(output_file).with_extension("c");
200
201        match decompile_to_file(&idb, &f, &output_path) {
202            // Print the output path in case of successful function decompilation
203            Ok(()) => println!("{func_name} -> `{}`", output_path.display()),
204
205            // Return an error if Hex-Rays decompiler license is not available
206            Err(HaruspexError::DecompileFailed(IDAError::HexRays(e)))
207                if e.code() == HexRaysErrorCode::License =>
208            {
209                return Err(e.into());
210            }
211
212            // Ignore other IDA errors
213            Err(HaruspexError::DecompileFailed(_)) => continue,
214
215            // Return any other error
216            Err(e) => return Err(e.into()),
217        }
218
219        COUNTER.fetch_add(1, Ordering::Relaxed);
220    }
221
222    // Remove the output directory and return an error in case no functions were decompiled
223    if COUNTER.load(Ordering::Relaxed) == 0 {
224        fs::remove_dir(&dirpath)
225            .with_context(|| format!("Failed to remove directory `{}`", dirpath.display()))?;
226        return Err(anyhow::anyhow!(
227            "No functions were decompiled, check your input file"
228        ));
229    }
230
231    println!();
232    println!(
233        "[+] Decompiled {COUNTER:?} functions into `{}`",
234        dirpath.display()
235    );
236    println!("[+] Done processing binary file `{}`", filepath.display());
237    Ok(COUNTER.load(Ordering::Relaxed))
238}
239
240/// Decompile [`Function`] `func` in [`IDB`] `idb` and save its pseudo-code to the output file at `filepath`.
241///
242/// ## Errors
243///
244/// Returns the appropriate [`HaruspexError`] in case something goes wrong.
245///
246/// ## Examples
247///
248/// Basic usage:
249/// ```
250/// # fn main() -> anyhow::Result<()> {
251/// # let base_dir = std::path::Path::new("./tests/data");
252/// let input_file = base_dir.join("ls");
253/// let output_file = base_dir.join("ls-main.c");
254///
255/// let idb = idalib::idb::IDB::open(&input_file)?;
256/// let (_, func) = idb
257///     .functions()
258///     .find(|(_, f)| f.name().unwrap() == "main")
259///     .unwrap();
260///
261/// haruspex::decompile_to_file(&idb, &func, &output_file)?;
262/// # std::fs::remove_file(output_file)?;
263/// # Ok(())
264/// # }
265/// ```
266///
267pub fn decompile_to_file(
268    idb: &IDB,
269    func: &Function,
270    filepath: impl AsRef<Path>,
271) -> Result<(), HaruspexError> {
272    // Decompile function
273    let decomp = idb.decompile(func)?;
274    let source = decomp.pseudocode();
275
276    // Write pseudo-code to output file
277    // Note: for easier testing, we could use a generic function together with `std::io::Cursor`
278    let mut writer = BufWriter::new(File::create(&filepath)?);
279    writer.write_all(source.as_bytes())?;
280    writer.flush()?;
281
282    Ok(())
283}