haruspex/lib.rs
1//!
2//! haruspex - Tool to extract IDA decompiler's pseudo-code
3//! Copyright (c) 2024-2025 Marco Ivaldi <raptor@0xdeadbeef.info>
4//!
5//! > "Hacking is the discipline of questioning all your assumptions all of the time."
6//! >
7//! > -- Dave Aitel
8//!
9//! Haruspex is a blazing fast IDA Pro headless plugin that extracts pseudo-code generated by IDA Pro's
10//! decompiler in a format that should be suitable to be imported into an IDE, or parsed by static
11//! analysis tools such as [Semgrep](https://semgrep.dev/), [weggli](https://github.com/weggli-rs/weggli),
12//! or [oneiromancer](https://crates.io/crates/oneiromancer).
13//!
14//! ## Features
15//! * Blazing fast, headless user experience courtesy of IDA Pro 9 and Binarly's idalib Rust bindings.
16//! * Support for binary targets for any architecture implemented by IDA Pro's Hex-Rays decompiler.
17//! * Pseudo-code of each function is stored in a separated file in the output directory for easy inspection.
18//! * External crates can invoke [`decompile_to_file`] to decompile a function and save its pseudo-code to disk.
19//!
20//! ## Blog post
21//! * <https://security.humanativaspa.it/streamlining-vulnerability-research-with-ida-pro-and-rust>
22//!
23//! ## See also
24//! * <https://github.com/0xdea/ghidra-scripts/blob/main/Haruspex.java>
25//! * <https://github.com/0xdea/semgrep-rules>
26//! * <https://github.com/0xdea/weggli-patterns>
27//! * <https://docs.hex-rays.com/release-notes/9_0#headless-processing-with-idalib>
28//! * <https://github.com/binarly-io/idalib>
29//! * <https://github.com/xorpse/parascope>
30//! * <https://security.humanativaspa.it/automating-binary-vulnerability-discovery-with-ghidra-and-semgrep>
31//!
32//! ## Installing
33//! The easiest way to get the latest release is via [crates.io](https://crates.io/crates/haruspex):
34//! 1. Download, install, and configure IDA Pro (see <https://hex-rays.com/ida-pro>).
35//! 2. Download and extract the IDA SDK (see <https://docs.hex-rays.com/developer-guide>).
36//! 3. Install LLVM/Clang (see <https://rust-lang.github.io/rust-bindgen/requirements.html>).
37//! 4. On Linux/macOS, install as follows:
38//! ```sh
39//! export IDASDKDIR=/path/to/idasdk
40//! export IDADIR=/path/to/ida # if not set, the build script will check common locations
41//! cargo install haruspex
42//! ```
43//! On Windows, instead, use the following commands:
44//! ```powershell
45//! $env:LIBCLANG_PATH="\path\to\clang+llvm\bin"
46//! $env:PATH="\path\to\ida;$env:PATH"
47//! $env:IDASDKDIR="\path\to\idasdk"
48//! $env:IDADIR="\path\to\ida" # if not set, the build script will check common locations
49//! cargo install haruspex
50//! ```
51//!
52//! ## Compiling
53//! Alternatively, you can build from [source](https://github.com/0xdea/haruspex):
54//! 1. Download, install, and configure IDA Pro (see <https://hex-rays.com/ida-pro>).
55//! 2. Download and extract the IDA SDK (see <https://docs.hex-rays.com/developer-guide>).
56//! 3. Install LLVM/Clang (see <https://rust-lang.github.io/rust-bindgen/requirements.html>).
57//! 4. On Linux/macOS, compile as follows:
58//! ```sh
59//! git clone --depth 1 https://github.com/0xdea/haruspex
60//! cd haruspex
61//! export IDASDKDIR=/path/to/idasdk # or edit .cargo/config.toml
62//! export IDADIR=/path/to/ida # if not set, the build script will check common locations
63//! cargo build --release
64//! ```
65//! On Windows, instead, use the following commands:
66//! ```powershell
67//! git clone --depth 1 https://github.com/0xdea/haruspex
68//! cd haruspex
69//! $env:LIBCLANG_PATH="\path\to\clang+llvm\bin"
70//! $env:PATH="\path\to\ida;$env:PATH"
71//! $env:IDASDKDIR="\path\to\idasdk"
72//! $env:IDADIR="\path\to\ida" # if not set, the build script will check common locations
73//! cargo build --release
74//! ```
75//!
76//! ## Usage
77//! 1. Make sure IDA Pro is properly configured with a valid license.
78//! 2. Run as follows:
79//! ```sh
80//! haruspex <binary_file>
81//! ```
82//! 3. Find the extracted pseudo-code of each decompiled function in the `binary_file.dec` directory:
83//! ```sh
84//! vim <binary_file>.dec
85//! code <binary_file>.dec
86//! ```
87//!
88//! ## Compatibility
89//! * IDA Pro 9.0.240925 - Latest compatible: v0.1.3.
90//! * IDA Pro 9.0.241217 - Latest compatible: v0.4.2.
91//! * IDA Pro 9.1.250226 - Latest compatible: current version.
92//!
93//! *Note: check [idalib](https://github.com/binarly-io/idalib) documentation for additional information.*
94//!
95//! ## Changelog
96//! * <https://github.com/0xdea/haruspex/blob/master/CHANGELOG.md>
97//!
98//! ## TODO
99//! * Integrate with Semgrep scanning (see <https://github.com/0xdea/semgrep-rules>).
100//! * Integrate with weggli scanning (see <https://github.com/0xdea/weggli-patterns>).
101//! * Improve decompiler output in the style of [HexRaysPyTools](https://github.com/igogo-x86/HexRaysPyTools)
102//! and [abyss](https://github.com/patois/abyss).
103//! * Implement parallel analysis (see <https://github.com/fugue-re/fugue-mptp>).
104//!
105
106#![doc(html_logo_url = "https://raw.githubusercontent.com/0xdea/haruspex/master/.img/logo.png")]
107
108use std::fs;
109use std::fs::File;
110use std::io::{BufWriter, Write};
111use std::path::Path;
112use std::sync::atomic::{AtomicUsize, Ordering};
113
114use anyhow::Context;
115use idalib::IDAError;
116use idalib::decompiler::HexRaysErrorCode;
117use idalib::func::{Function, FunctionFlags};
118use idalib::idb::IDB;
119use thiserror::Error;
120
121/// Number of decompiled functions
122static COUNTER: AtomicUsize = AtomicUsize::new(0);
123
124/// Reserved characters in filenames
125#[cfg(unix)]
126static RESERVED_CHARS: &[char] = &['.', '/'];
127#[cfg(windows)]
128static RESERVED_CHARS: &[char] = &['.', '/', '<', '>', ':', '"', '\\', '|', '?', '*'];
129
130/// Maximum length of filenames
131static MAX_FILENAME_LEN: usize = 64;
132
133/// Haruspex error type
134#[derive(Error, Debug)]
135pub enum HaruspexError {
136 /// Failure in decompiling the function
137 #[error(transparent)]
138 DecompileFailed(#[from] IDAError),
139 /// Failure in writing to the output file
140 #[error(transparent)]
141 FileWriteFailed(#[from] std::io::Error),
142}
143
144/// Extract pseudo-code of functions in the binary file at `filepath` and save it in `filepath.dec`.
145///
146/// ## Errors
147///
148/// Returns how many functions were decompiled, or a generic error in case something goes wrong.
149pub fn run(filepath: &Path) -> anyhow::Result<usize> {
150 // Open the target binary and run auto-analysis
151 println!("[*] Trying to analyze binary file `{}`", filepath.display());
152 let idb = IDB::open(filepath)
153 .with_context(|| format!("Failed to analyze binary file `{}`", filepath.display()))?;
154 println!("[+] Successfully analyzed binary file");
155 println!();
156
157 // Print binary file information
158 println!("[-] Processor: {}", idb.processor().long_name());
159 println!("[-] Compiler: {:?}", idb.meta().cc_id());
160 println!("[-] File type: {:?}", idb.meta().filetype());
161 println!();
162
163 // Check if Hex-Rays decompiler is available
164 if !idb.decompiler_available() {
165 return Err(anyhow::anyhow!("Decompiler is not available"));
166 }
167
168 // Create a new output directory, returning an error if it already exists, and it's not empty
169 let dirpath = filepath.with_extension("dec");
170 println!("[*] Preparing output directory `{}`", dirpath.display());
171 if dirpath.exists() {
172 fs::remove_dir(&dirpath).map_err(|_| anyhow::anyhow!("Output directory already exists"))?;
173 }
174 fs::create_dir_all(&dirpath)
175 .with_context(|| format!("Failed to create directory `{}`", dirpath.display()))?;
176 println!("[+] Output directory is ready");
177
178 // Extract pseudo-code of functions
179 println!();
180 println!("[*] Extracting pseudo-code of functions...");
181 println!();
182 for (_id, f) in idb.functions() {
183 // Skip the function if it has the `thunk` attribute
184 if f.flags().contains(FunctionFlags::THUNK) {
185 continue;
186 }
187
188 // Decompile function and write pseudo-code to the output file
189 let func_name = f.name().unwrap_or_else(|| "<no name>".into());
190 let output_file = format!(
191 "{}@{:X}",
192 func_name
193 .replace(RESERVED_CHARS, "_")
194 .chars()
195 .take(MAX_FILENAME_LEN)
196 .collect::<String>(),
197 f.start_address()
198 );
199 let output_path = dirpath.join(output_file).with_extension("c");
200
201 match decompile_to_file(&idb, &f, &output_path) {
202 // Print the output path in case of successful function decompilation
203 Ok(()) => println!("{func_name} -> `{}`", output_path.display()),
204
205 // Return an error if Hex-Rays decompiler license is not available
206 Err(HaruspexError::DecompileFailed(IDAError::HexRays(e)))
207 if e.code() == HexRaysErrorCode::License =>
208 {
209 return Err(e.into());
210 }
211
212 // Ignore other IDA errors
213 Err(HaruspexError::DecompileFailed(_)) => continue,
214
215 // Return any other error
216 Err(e) => return Err(e.into()),
217 }
218
219 COUNTER.fetch_add(1, Ordering::Relaxed);
220 }
221
222 // Remove the output directory and return an error in case no functions were decompiled
223 if COUNTER.load(Ordering::Relaxed) == 0 {
224 fs::remove_dir(&dirpath)
225 .with_context(|| format!("Failed to remove directory `{}`", dirpath.display()))?;
226 return Err(anyhow::anyhow!(
227 "No functions were decompiled, check your input file"
228 ));
229 }
230
231 println!();
232 println!(
233 "[+] Decompiled {COUNTER:?} functions into `{}`",
234 dirpath.display()
235 );
236 println!("[+] Done processing binary file `{}`", filepath.display());
237 Ok(COUNTER.load(Ordering::Relaxed))
238}
239
240/// Decompile [`Function`] `func` in [`IDB`] `idb` and save its pseudo-code to the output file at `filepath`.
241///
242/// ## Errors
243///
244/// Returns the appropriate [`HaruspexError`] in case something goes wrong.
245///
246/// ## Examples
247///
248/// Basic usage:
249/// ```
250/// # fn main() -> anyhow::Result<()> {
251/// # let base_dir = std::path::Path::new("./tests/data");
252/// let input_file = base_dir.join("ls");
253/// let output_file = base_dir.join("ls-main.c");
254///
255/// let idb = idalib::idb::IDB::open(&input_file)?;
256/// let (_, func) = idb
257/// .functions()
258/// .find(|(_, f)| f.name().unwrap() == "main")
259/// .unwrap();
260///
261/// haruspex::decompile_to_file(&idb, &func, &output_file)?;
262/// # std::fs::remove_file(output_file)?;
263/// # Ok(())
264/// # }
265/// ```
266///
267pub fn decompile_to_file(
268 idb: &IDB,
269 func: &Function,
270 filepath: impl AsRef<Path>,
271) -> Result<(), HaruspexError> {
272 // Decompile function
273 let decomp = idb.decompile(func)?;
274 let source = decomp.pseudocode();
275
276 // Write pseudo-code to output file
277 // Note: for easier testing, we could use a generic function together with `std::io::Cursor`
278 let mut writer = BufWriter::new(File::create(&filepath)?);
279 writer.write_all(source.as_bytes())?;
280 writer.flush()?;
281
282 Ok(())
283}