From aacfcaaa239db18b5c68986163bf38f6823fb18b Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 3 Mar 2023 09:52:50 -0700 Subject: [PATCH 01/12] explorer: improve embedded PE detection --- capa/features/extractors/ida/file.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index 0d380a88..532d5a89 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -21,12 +21,14 @@ from capa.features.file import Export, Import, Section, FunctionName from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress +MAX_OFFSET_PE_AFTER_MZ = 0x200 + def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: """check segment for embedded PE adapted for IDA from: - https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19 + https://github.com/vivisect/vivisect/blob/91e8419a861f49779f18316f155311967e696836/PE/carve.py#L25 """ seg_max = seg.end_ea mz_xor = [ @@ -40,13 +42,14 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: todo = [] for mzx, pex, i in mz_xor: + # find all segment offsets containing XOR'd "MZ" bytes for off in capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx): todo.append((off, mzx, pex, i)) while len(todo): off, mzx, pex, i = todo.pop() - # The MZ header has one field we will check e_lfanew is at 0x3c + # MZ header has one field we will check e_lfanew is at 0x3c e_lfanew = off + 0x3C if seg_max < (e_lfanew + 4): @@ -54,6 +57,10 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: newoff = struct.unpack(" MAX_OFFSET_PE_AFTER_MZ: + continue + peoff = off + newoff if seg_max < (peoff + 2): continue @@ -61,9 +68,6 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: if idc.get_bytes(peoff, 2) == pex: yield off, i - for nextres in capa.features.extractors.ida.helpers.find_byte_sequence(off + 1, seg.end_ea, mzx): - todo.append((nextres, mzx, pex, i)) - def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]: """extract embedded PE features From 14c18727db75e6b04dfb8b588f331173ed31ef63 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 3 Mar 2023 09:55:45 -0700 Subject: [PATCH 02/12] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 543db9ac..52f6de8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ - match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff - explorer: fix exception when plugin loaded in IDA hosted under idat #1341 @mike-hunhoff - extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6 +- explorer: improve embedded PE detection #1344 @mike-hunhoff ### capa explorer IDA Pro plugin From 02dc42154bf97b71ae730e49149e95ed51bfdaf0 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Mon, 6 Mar 2023 08:53:57 -0700 Subject: [PATCH 03/12] Update CHANGELOG.md Co-authored-by: Willi Ballenthin --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52f6de8e..439b0827 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ - match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff - explorer: fix exception when plugin loaded in IDA hosted under idat #1341 @mike-hunhoff - extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6 -- explorer: improve embedded PE detection #1344 @mike-hunhoff +- explorer: improve embedded PE detection performance and reduce FP potential #1344 @mike-hunhoff ### capa explorer IDA Pro plugin From 95f23dafe57930ffef8f8df1aed4dacce4389bc8 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Mon, 6 Mar 2023 08:55:32 -0700 Subject: [PATCH 04/12] Update CHANGELOG.md --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 439b0827..ac3ba707 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,11 +25,11 @@ ### Bug Fixes - extractor: fix vivisect loop detection corner case #1310 @mr-tz - match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff -- explorer: fix exception when plugin loaded in IDA hosted under idat #1341 @mike-hunhoff - extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6 -- explorer: improve embedded PE detection performance and reduce FP potential #1344 @mike-hunhoff ### capa explorer IDA Pro plugin +- fix exception when plugin loaded in IDA hosted under idat #1341 @mike-hunhoff +- improve embedded PE detection performance and reduce FP potential #1344 @mike-hunhoff ### Development From 51286380717dd60be9e2ae1d89b8f8ebd8ccf12c Mon Sep 17 00:00:00 2001 From: manasghandat <95558940+manasghandat@users.noreply.github.com> Date: Thu, 9 Mar 2023 11:58:47 +0530 Subject: [PATCH 05/12] code style: update lint.py (#1352) * code style: update lint.py --- scripts/lint.py | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/scripts/lint.py b/scripts/lint.py index c049dd6e..cf56f1a8 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -648,11 +648,11 @@ class FormatStringQuotesIncorrect(Lint): continue if value.style is None: # no quotes - self.recommendation = 'add double quotes to "%s"' % value.value + self.recommendation = f'add double quotes to "{value.value}"' return True if value.style == "'": # single quote - self.recommendation = 'change single quotes to double quotes for "%s"' % value.value + self.recommendation = f'change single quotes to double quotes for "{value.value}"' return True elif isinstance(key, ruamel.yaml.ScalarEvent) and key.value == "substring": @@ -662,11 +662,11 @@ class FormatStringQuotesIncorrect(Lint): continue if value.style is None: # no quotes - self.recommendation = 'add double quotes to "%s"' % value.value + self.recommendation = f'add double quotes to "{value.value}"' return True if value.style == "'": # single quote - self.recommendation = 'change single quotes to double quotes for "%s"' % value.value + self.recommendation = f'change single quotes to double quotes for "{value.value}"' return True else: @@ -816,25 +816,12 @@ def lint_rule(ctx: Context, rule: Rule): # and ends up just producing a lot of noise. if not (is_nursery_rule(rule) and len(violations) == 1 and violations[0].name == "missing examples"): print("") - print( - "%s%s" - % ( - " (nursery) " if is_nursery_rule(rule) else "", - rule.name, - ) - ) + print(f'{" (nursery) " if is_nursery_rule(rule) else ""} {rule.name}') for violation in violations: print( - "%s %s: %s: %s" - % ( - " " if is_nursery_rule(rule) else "", - Lint.WARN if is_nursery_rule(rule) else violation.level, - violation.name, - violation.recommendation, - ) + f"{' ' if is_nursery_rule(rule) else ''} {Lint.WARN if is_nursery_rule(rule) else violation.level}: {violation.name}: {violation.recommendation}" ) - print("") if is_nursery_rule(rule): @@ -860,8 +847,8 @@ def lint_rule(ctx: Context, rule: Rule): if (not lints_failed) and (not lints_warned) and has_examples: print("") - print("%s%s" % (" (nursery) ", rule.name)) - print("%s %s: %s: %s" % (" ", Lint.WARN, green("no lint failures"), "Graduate the rule")) + print(f'{" (nursery) " if is_nursery_rule(rule) else ""} {rule.name}') + print(f" {Lint.WARN}: {green('no lint failures')}: Graduate the rule") print("") else: lints_failed = len(tuple(filter(lambda v: v.level == Lint.FAIL, violations))) @@ -921,7 +908,7 @@ def lint(ctx: Context): with redirecting_print_to_tqdm(): for rule in pbar: name = rule.name - pbar.set_description(width("linting rule: %s" % (name), 48)) + pbar.set_description(width(f"linting rule: {name}", 48)) ret[name] = lint_rule(ctx, rule) return ret From d8f89d49d47ec7a0af7db83a2d0f29588e100c70 Mon Sep 17 00:00:00 2001 From: AG <98327736+ggold7046@users.noreply.github.com> Date: Fri, 10 Mar 2023 01:17:59 +0530 Subject: [PATCH 06/12] Update import-to-bn.py Used f string for enhanced readability. --- scripts/import-to-bn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/import-to-bn.py b/scripts/import-to-bn.py index b9b48cd8..d157af44 100644 --- a/scripts/import-to-bn.py +++ b/scripts/import-to-bn.py @@ -93,9 +93,9 @@ def load_analysis(bv): rows = sorted(rows) for ns, name, va in rows: if ns: - cmt = "%s (%s)" % (name, ns) + cmt = f"{name} ({ns})" else: - cmt = "%s" % (name,) + cmt = f"{name}" binaryninja.log_info("0x%x: %s" % (va, cmt)) try: From 50935372cab82f0184d9142ffe243360c56fff29 Mon Sep 17 00:00:00 2001 From: AG <98327736+ggold7046@users.noreply.github.com> Date: Fri, 10 Mar 2023 01:36:17 +0530 Subject: [PATCH 07/12] Update import-to-ida.py Updated with f string for enhanced readability. --- scripts/import-to-ida.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/import-to-ida.py b/scripts/import-to-ida.py index e5d4b5ad..a32b45b9 100644 --- a/scripts/import-to-ida.py +++ b/scripts/import-to-ida.py @@ -101,9 +101,9 @@ def main(): rows = sorted(rows) for ns, name, va in rows: if ns: - cmt = "%s (%s)" % (name, ns) + cmt = f"{name} ({ns})" else: - cmt = "%s" % (name,) + cmt = f"{name}" logger.info("0x%x: %s", va, cmt) try: From eaeef59583f38532e91b2b0cb71ce5bb0b6184d8 Mon Sep 17 00:00:00 2001 From: AG <98327736+ggold7046@users.noreply.github.com> Date: Fri, 10 Mar 2023 13:03:04 +0530 Subject: [PATCH 08/12] Update insn.py Updated with f strings for enhanced readability. --- capa/features/insn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/insn.py b/capa/features/insn.py index 1f1c0171..030784fe 100644 --- a/capa/features/insn.py +++ b/capa/features/insn.py @@ -31,7 +31,7 @@ class _AccessFeature(Feature, abc.ABC): super().__init__(value, description=description) if access is not None: if access not in VALID_FEATURE_ACCESS: - raise ValueError("%s access type %s not valid" % (self.name, access)) + raise ValueError(f"{self.name} access type {access} not valid") self.access = access def __hash__(self): From df6de3446c14b1a2231741daf76f08124b867aa2 Mon Sep 17 00:00:00 2001 From: AG <98327736+ggold7046@users.noreply.github.com> Date: Fri, 10 Mar 2023 13:10:02 +0530 Subject: [PATCH 09/12] Update file.py Updated with f string for enhanced readability. --- capa/features/extractors/ida/file.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index 532d5a89..a3da4c6a 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -106,13 +106,13 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1]): yield Import(name), addr dll = info[0] - symbol = "#%d" % (info[2]) + symbol = f"#{info[2]}" elif info[1]: dll = info[0] symbol = info[1] elif info[2]: dll = info[0] - symbol = "#%d" % (info[2]) + symbol = f"#{info[2]}" else: continue @@ -180,7 +180,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]: # no file type to return when processing a binary file, but we want to continue processing return else: - raise NotImplementedError("unexpected file format: %d" % file_info.filetype) + raise NotImplementedError(f"unexpected file format: {file_info.filetype}") def extract_features() -> Iterator[Tuple[Feature, Address]]: From 7031c68a85187881f307954cbca8d5e20d9e31c9 Mon Sep 17 00:00:00 2001 From: linpeiyu164 Date: Sat, 11 Mar 2023 00:07:24 +0800 Subject: [PATCH 10/12] fix wrong indentation level for args.backend --- capa/main.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/capa/main.py b/capa/main.py index ba03c7a4..617df71f 100644 --- a/capa/main.py +++ b/capa/main.py @@ -853,15 +853,15 @@ def install_common_args(parser, wanted=None): help="select sample format, %s" % format_help, ) - if "backend" in wanted: - parser.add_argument( - "-b", - "--backend", - type=str, - help="select the backend to use", - choices=(BACKEND_VIV,), - default=BACKEND_VIV, - ) + if "backend" in wanted: + parser.add_argument( + "-b", + "--backend", + type=str, + help="select the backend to use", + choices=(BACKEND_VIV,), + default=BACKEND_VIV, + ) if "rules" in wanted: parser.add_argument( From 02e451a2b19e564b03a235af92bc32d3792a5c55 Mon Sep 17 00:00:00 2001 From: AG <98327736+ggold7046@users.noreply.github.com> Date: Sat, 11 Mar 2023 12:29:59 +0530 Subject: [PATCH 11/12] Update profile-memory.py Updated with f string for enhanced readability. --- scripts/profile-memory.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scripts/profile-memory.py b/scripts/profile-memory.py index 349f6ee9..c6e2df90 100644 --- a/scripts/profile-memory.py +++ b/scripts/profile-memory.py @@ -16,10 +16,10 @@ def display_top(snapshot, key_type="lineno", limit=10): ) top_stats = snapshot.statistics(key_type) - print("Top %s lines" % limit) + print(f"Top {limit} lines") for index, stat in enumerate(top_stats[:limit], 1): frame = stat.traceback[0] - print("#%s: %s:%s: %.1f KiB" % (index, frame.filename, frame.lineno, stat.size / 1024)) + print(f"#{index}: {frame.filename}:{frame.lineno}: {stat.size / 1024:.1f} KiB") line = linecache.getline(frame.filename, frame.lineno).strip() if line: print(" %s" % line) @@ -27,9 +27,9 @@ def display_top(snapshot, key_type="lineno", limit=10): other = top_stats[limit:] if other: size = sum(stat.size for stat in other) - print("%s other: %.1f KiB" % (len(other), size / 1024)) + print(f"{len(other)} other: {size / 1024:.1f} KiB") total = sum(stat.size for stat in top_stats) - print("Total allocated size: %.1f KiB" % (total / 1024)) + print(f"Total allocated size: {total / 1024:.1f} KiB") def main(): @@ -45,11 +45,11 @@ def main(): import capa.main count = int(os.environ.get("CAPA_PROFILE_COUNT", 1)) - print("total iterations planned: %d (set via env var CAPA_PROFILE_COUNT)." % (count)) + print(f"total iterations planned: {count} (set via env var CAPA_PROFILE_COUNT).") print() for i in range(count): - print("iteration %d/%d..." % (i + 1, count)) + print(f"iteration {i + 1}/{count}...") with contextlib.redirect_stdout(io.StringIO()): with contextlib.redirect_stderr(io.StringIO()): t0 = time.time() @@ -59,9 +59,9 @@ def main(): gc.collect() process = psutil.Process(os.getpid()) - print(" duration: %0.02fs" % (t1 - t0)) - print(" rss: %.1f MiB" % (process.memory_info().rss / 1024 / 1024)) - print(" vms: %.1f MiB" % (process.memory_info().vms / 1024 / 1024)) + print(f" duration: {t1 - t0:.02f}s") + print(f" rss: {process.memory_info().rss / 1024 / 1024:.1f} MiB") + print(f" vms: {process.memory_info().vms / 1024 / 1024:.1f} MiB") print("done.") gc.collect() From 6321adc41196c402a61637577082919864f587a2 Mon Sep 17 00:00:00 2001 From: AG <98327736+ggold7046@users.noreply.github.com> Date: Sat, 11 Mar 2023 12:43:22 +0530 Subject: [PATCH 12/12] Update match-function-id.py Updated with f string for enhanced readability. --- scripts/match-function-id.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/match-function-id.py b/scripts/match-function-id.py index e7b1ea38..0daa88bb 100644 --- a/scripts/match-function-id.py +++ b/scripts/match-function-id.py @@ -125,7 +125,7 @@ def main(argv=None): for analyzer in analyzers: name = viv_utils.flirt.match_function_flirt_signatures(analyzer.matcher, vw, function) if name: - print("0x%04x: %s" % (function, name)) + print(f"0x{function:04x}: {name}") return 0