haruspex/lib.rs
1//!
2//! haruspex - Tool to extract IDA decompiler's pseudocode
3//! Copyright (c) 2024-2025 Marco Ivaldi <raptor@0xdeadbeef.info>
4//!
5//! > "Hacking is the discipline of questioning all your assumptions all of the time."
6//! >
7//! > -- Dave Aitel
8//!
9//! Haruspex is a blazing fast IDA Pro headless plugin that extracts pseudocode generated by IDA Pro's
10//! decompiler in a format that should be suitable to be imported into an IDE, or parsed by static
11//! analysis tools such as [Semgrep](https://semgrep.dev/), [weggli](https://github.com/weggli-rs/weggli),
12//! or [oneiromancer](https://crates.io/crates/oneiromancer).
13//!
14//! ## Features
15//! * Blazing fast, headless user experience courtesy of IDA Pro 9.x and Binarly's idalib Rust bindings.
16//! * Support for binary targets for any architecture implemented by IDA Pro's Hex-Rays decompiler.
17//! * Pseudocode of each function is stored in a separated file in the output directory for easy inspection.
18//! * External crates can invoke [`decompile_to_file`] to decompile a function and save its pseudocode to disk.
19//!
20//! ## Blog posts
21//! * <https://hex-rays.com/blog/streamlining-vulnerability-research-idalib-rust-bindings>
22//! * <https://hnsecurity.it/blog/streamlining-vulnerability-research-with-ida-pro-and-rust>
23//!
24//! ## See also
25//! * <https://github.com/0xdea/ghidra-scripts/blob/main/Haruspex.java>
26//! * <https://github.com/0xdea/semgrep-rules>
27//! * <https://github.com/0xdea/weggli-patterns>
28//! * <https://docs.hex-rays.com/release-notes/9_0#headless-processing-with-idalib>
29//! * <https://github.com/binarly-io/idalib>
30//! * <https://github.com/xorpse/parascope>
31//! * <https://hnsecurity.it/blog/automating-binary-vulnerability-discovery-with-ghidra-and-semgrep>
32//!
33//! ## Installing
34//! The easiest way to get the latest release is via [crates.io](https://crates.io/crates/haruspex):
35//! 1. Download, install, and configure IDA Pro (see <https://hex-rays.com/ida-pro>).
36//! 2. Install LLVM/Clang (see <https://rust-lang.github.io/rust-bindgen/requirements.html>).
37//! 3. On Linux/macOS, install as follows:
38//! ```sh
39//! export IDADIR=/path/to/ida # if not set, the build script will check common locations
40//! cargo install haruspex
41//! ```
42//! On Windows, instead, use the following commands:
43//! ```powershell
44//! $env:LIBCLANG_PATH="\path\to\clang+llvm\bin"
45//! $env:PATH="\path\to\ida;$env:PATH"
46//! $env:IDADIR="\path\to\ida" # if not set, the build script will check common locations
47//! cargo install haruspex
48//! ```
49//!
50//! ## Compiling
51//! Alternatively, you can build from [source](https://github.com/0xdea/haruspex):
52//! 1. Download, install, and configure IDA Pro (see <https://hex-rays.com/ida-pro>).
53//! 2. Install LLVM/Clang (see <https://rust-lang.github.io/rust-bindgen/requirements.html>).
54//! 3. On Linux/macOS, compile as follows:
55//! ```sh
56//! git clone --depth 1 https://github.com/0xdea/haruspex
57//! cd haruspex
58//! export IDADIR=/path/to/ida # if not set, the build script will check common locations
59//! cargo build --release
60//! ```
61//! On Windows, instead, use the following commands:
62//! ```powershell
63//! git clone --depth 1 https://github.com/0xdea/haruspex
64//! cd haruspex
65//! $env:LIBCLANG_PATH="\path\to\clang+llvm\bin"
66//! $env:PATH="\path\to\ida;$env:PATH"
67//! $env:IDADIR="\path\to\ida" # if not set, the build script will check common locations
68//! cargo build --release
69//! ```
70//!
71//! ## Usage
72//! 1. Make sure IDA Pro is properly configured with a valid license.
73//! 2. Run as follows:
74//! ```sh
75//! haruspex <binary_file>
76//! ```
77//! 3. Find the extracted pseudocode of each decompiled function in the `binary_file.dec` directory:
78//! ```sh
79//! vim <binary_file>.dec
80//! code <binary_file>.dec
81//! ```
82//!
83//! ## Compatibility
84//! * IDA Pro 9.0.240925 - Latest compatible: v0.2.4.
85//! * IDA Pro 9.0.241217 - Latest compatible: v0.3.5.
86//! * IDA Pro 9.1.250226 - Latest compatible: v0.6.2.
87//! * IDA Pro 9.2.250908 - Latest compatible: current version.
88//!
89//! *Note: check [idalib](https://github.com/binarly-io/idalib) documentation for additional information.*
90//!
91//! ## Changelog
92//! * <https://github.com/0xdea/haruspex/blob/master/CHANGELOG.md>
93//!
94//! ## TODO
95//! * Integrate with Semgrep scanning (see <https://github.com/0xdea/semgrep-rules>).
96//! * Integrate with weggli scanning (see <https://github.com/0xdea/weggli-patterns>).
97//! * Improve decompiler output in the style of [HexRaysPyTools](https://github.com/igogo-x86/HexRaysPyTools)
98//! and [abyss](https://github.com/patois/abyss).
99//! * Implement parallel analysis (see <https://github.com/fugue-re/fugue-mptp>).
100//!
101
102#![doc(html_logo_url = "https://raw.githubusercontent.com/0xdea/haruspex/master/.img/logo.png")]
103
104use std::fs;
105use std::fs::File;
106use std::io::{BufWriter, Write};
107use std::path::Path;
108use std::sync::atomic::{AtomicUsize, Ordering};
109
110use anyhow::Context;
111use idalib::IDAError;
112use idalib::decompiler::HexRaysErrorCode;
113use idalib::func::{Function, FunctionFlags};
114use idalib::idb::IDB;
115use thiserror::Error;
116
117/// Number of decompiled functions
118static COUNTER: AtomicUsize = AtomicUsize::new(0);
119
120/// Reserved characters in filenames
121#[cfg(unix)]
122static RESERVED_CHARS: &[char] = &['.', '/'];
123#[cfg(windows)]
124static RESERVED_CHARS: &[char] = &['.', '/', '<', '>', ':', '"', '\\', '|', '?', '*'];
125
126/// Maximum length of filenames
127static MAX_FILENAME_LEN: usize = 64;
128
129/// Haruspex error type
130#[derive(Error, Debug)]
131pub enum HaruspexError {
132 /// Failure in decompiling the function
133 #[error(transparent)]
134 DecompileFailed(#[from] IDAError),
135 /// Failure in writing to the output file
136 #[error(transparent)]
137 FileWriteFailed(#[from] std::io::Error),
138}
139
140/// Extract pseudocode of functions in the binary file at `filepath` and save it in `filepath.dec`.
141///
142/// ## Errors
143///
144/// Returns how many functions were decompiled, or a generic error in case something goes wrong.
145pub fn run(filepath: &Path) -> anyhow::Result<usize> {
146 // Open the target binary and run auto-analysis
147 println!("[*] Analyzing binary file `{}`", filepath.display());
148 let idb = IDB::open(filepath)
149 .with_context(|| format!("Failed to analyze binary file `{}`", filepath.display()))?;
150 println!("[+] Successfully analyzed binary file");
151 println!();
152
153 // Print binary file information
154 println!("[-] Processor: {}", idb.processor().long_name());
155 println!("[-] Compiler: {:?}", idb.meta().cc_id());
156 println!("[-] File type: {:?}", idb.meta().filetype());
157 println!();
158
159 // Check if Hex-Rays decompiler is available
160 if !idb.decompiler_available() {
161 return Err(anyhow::anyhow!("Decompiler is not available"));
162 }
163
164 // Create a new output directory, returning an error if it already exists, and it's not empty
165 let dirpath = filepath.with_extension("dec");
166 println!("[*] Preparing output directory `{}`", dirpath.display());
167 if dirpath.exists() {
168 fs::remove_dir(&dirpath).map_err(|_| anyhow::anyhow!("Output directory already exists"))?;
169 }
170 fs::create_dir_all(&dirpath)
171 .with_context(|| format!("Failed to create directory `{}`", dirpath.display()))?;
172 println!("[+] Output directory is ready");
173
174 // Extract pseudocode of functions
175 println!();
176 println!("[*] Extracting pseudocode of functions...");
177 println!();
178 for (_id, f) in idb.functions() {
179 // Skip the function if it has the `thunk` attribute
180 if f.flags().contains(FunctionFlags::THUNK) {
181 continue;
182 }
183
184 // Decompile function and write pseudocode to the output file
185 let func_name = f.name().unwrap_or_else(|| "<no name>".into());
186 let output_file = format!(
187 "{}@{:X}",
188 func_name
189 .replace(RESERVED_CHARS, "_")
190 .chars()
191 .take(MAX_FILENAME_LEN)
192 .collect::<String>(),
193 f.start_address()
194 );
195 let output_path = dirpath.join(output_file).with_extension("c");
196
197 match decompile_to_file(&idb, &f, &output_path) {
198 // Print the output path in case of successful function decompilation
199 Ok(()) => println!("{func_name} -> `{}`", output_path.display()),
200
201 // Return an error if Hex-Rays decompiler license is not available
202 Err(HaruspexError::DecompileFailed(IDAError::HexRays(e)))
203 if e.code() == HexRaysErrorCode::License =>
204 {
205 return Err(e.into());
206 }
207
208 // Ignore other IDA errors
209 Err(HaruspexError::DecompileFailed(_)) => continue,
210
211 // Return any other error
212 Err(e) => return Err(e.into()),
213 }
214
215 COUNTER.fetch_add(1, Ordering::Relaxed);
216 }
217
218 // Remove the output directory and return an error in case no functions were decompiled
219 if COUNTER.load(Ordering::Relaxed) == 0 {
220 fs::remove_dir(&dirpath)
221 .with_context(|| format!("Failed to remove directory `{}`", dirpath.display()))?;
222 return Err(anyhow::anyhow!(
223 "No functions were decompiled, check your input file"
224 ));
225 }
226
227 println!();
228 println!(
229 "[+] Decompiled {COUNTER:?} functions into `{}`",
230 dirpath.display()
231 );
232 println!("[+] Done processing binary file `{}`", filepath.display());
233 Ok(COUNTER.load(Ordering::Relaxed))
234}
235
236/// Decompile [`Function`] `func` in [`IDB`] `idb` and save its pseudocode to the output file at `filepath`.
237///
238/// ## Errors
239///
240/// Returns the appropriate [`HaruspexError`] in case something goes wrong.
241///
242/// ## Examples
243///
244/// Basic usage:
245/// ```
246/// # fn main() -> anyhow::Result<()> {
247/// # let base_dir = std::path::Path::new("./tests/data");
248/// let input_file = base_dir.join("ls");
249/// let output_file = base_dir.join("ls-main.c");
250///
251/// let idb = idalib::idb::IDB::open(&input_file)?;
252/// let (_, func) = idb
253/// .functions()
254/// .find(|(_, f)| f.name().unwrap() == "main")
255/// .unwrap();
256///
257/// haruspex::decompile_to_file(&idb, &func, &output_file)?;
258/// # std::fs::remove_file(output_file)?;
259/// # Ok(())
260/// # }
261/// ```
262///
263pub fn decompile_to_file(
264 idb: &IDB,
265 func: &Function,
266 filepath: impl AsRef<Path>,
267) -> Result<(), HaruspexError> {
268 // Decompile function
269 let decomp = idb.decompile(func)?;
270 let source = decomp.pseudocode();
271
272 // Write pseudocode to output file
273 // Note: for easier testing, we could use a generic function together with `std::io::Cursor`
274 let mut writer = BufWriter::new(File::create(&filepath)?);
275 writer.write_all(source.as_bytes())?;
276 writer.flush()?;
277
278 Ok(())
279}