ida extractor: extract APIs from renamed globals

Add support to extract dynamically resolved APIs stored in global
variables that have been renamed (for example using the `renimp.idc`
script included with IDA Pro).
This commit is contained in:
Ana Maria Martinez Gomez
2024-07-09 16:16:49 +02:00
parent 40c7714c48
commit be8499238c
5 changed files with 38 additions and 10 deletions

View File

@@ -6,6 +6,7 @@
- webui: explore capa analysis results in a web-based UI online and offline #2224 @s-ff
- support analyzing DRAKVUF traces #2143 @yelhamer
- IDA extractor: extract names from dynamically resolved APIs stored in renamed global variables #2201 @Ana06
### Breaking Changes

View File

@@ -269,6 +269,7 @@ Please learn to write rules and contribute new entries as you find interesting t
# IDA Pro plugin: capa explorer
If you use IDA Pro, then you can use the [capa explorer](https://github.com/mandiant/capa/tree/master/capa/ida/plugin) plugin.
capa explorer helps you identify interesting areas of a program and build new capa rules using features extracted directly from your IDA Pro database.
It also uses your local changes to the .idb to extract better features, such as when you rename a global variable that contains a dynamically resolved API address.
![capa + IDA Pro integration](https://github.com/mandiant/capa/blob/master/doc/img/explorer_expanded.png)

View File

@@ -5,9 +5,11 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from typing import Any, Dict, Tuple, Iterator
import re
from typing import Any, Dict, Tuple, Iterator, Optional
import idc
import ida_ua
import idaapi
import idautils
@@ -35,9 +37,9 @@ def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]:
return ctx["externs_cache"]
def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[Any]:
def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Optional[Tuple[str, str]]:
"""check instruction for API call"""
info = ()
info = None
ref = insn.ea
# attempt to resolve API calls by following chained thunks to a reasonable depth
@@ -52,7 +54,7 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[A
except IndexError:
break
info = funcs.get(ref, ())
info = funcs.get(ref)
if info:
break
@@ -60,8 +62,7 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[A
if not f or not (f.flags & idaapi.FUNC_THUNK):
break
if info:
yield info
return info
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
@@ -76,16 +77,39 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
if insn.get_canon_mnem() not in ("call", "jmp"):
return
# check calls to imported functions
for api in check_for_api_call(insn, get_imports(fh.ctx)):
# check call to imported functions
api = check_for_api_call(insn, get_imports(fh.ctx))
if api:
# tuple (<module>, <function>, <ordinal>)
for name in capa.features.extractors.helpers.generate_symbols(api[0], api[1]):
yield API(name), ih.address
# a call instruction should only call one function, stop if a call to an import is extracted
return
# check calls to extern functions
for api in check_for_api_call(insn, get_externs(fh.ctx)):
# check call to extern functions
api = check_for_api_call(insn, get_externs(fh.ctx))
if api:
# tuple (<module>, <function>, <ordinal>)
yield API(api[1]), ih.address
# a call instruction should only call one function, stop if a call to an extern is extracted
return
# extract dynamically resolved APIs stored in renamed globals (renamed for example using `renimp.idc`)
# examples: `CreateProcessA`, `HttpSendRequestA`
if insn.Op1.type == ida_ua.o_mem:
op_addr = insn.Op1.addr
op_name = idaapi.get_name(op_addr)
# when renaming a global using an API name, IDA assigns it the function type
# ensure we do not extract something wrong by checking that the address has a name and a type
# we could check that the type is a function definition, but that complicates the code
if (not op_name.startswith("off_")) and idc.get_type(op_addr):
# Remove suffix used in repeated names, for example _0 in VirtualFree_0
match = re.match(r"(.+)_\d+", op_name)
if match:
op_name = match.group(1)
# the global name does not include the DLL name, so we can't extract it
for name in capa.features.extractors.helpers.generate_symbols("", op_name):
yield API(name), ih.address
# extract IDA/FLIRT recognized API functions
targets = tuple(idautils.CodeRefsFrom(insn.ea, False))

View File

@@ -81,6 +81,7 @@ can update using the `Settings` button.
* Double-click the `Address` column to navigate your Disassembly view to the address of the associated feature
* Double-click a result in the `Rule Information` column to expand its children
* Select a checkbox in the `Rule Information` column to highlight the address of the associated feature in your Disassembly view
* Reanalyze if you renamed global variables that store dynamically resolved APIs. capa will use these to improve its analysis.
#### Tips for Rule Generator

View File

@@ -188,6 +188,7 @@ known_first_party = [
"ida_loader",
"ida_nalt",
"ida_segment",
"ida_ua",
"idaapi",
"idautils",
"idc",