From 29dffffe1b844ddc9b978493922d624081c0a309 Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Mon, 8 Feb 2021 19:30:54 +0100 Subject: [PATCH 01/14] Python3 support for vivisect Vivisect has moved to Python3. Allow to run vivisect with Python3 in capa. I am using the following version of vivisect (which includes fixes for some bugs I have found and some open PRs in vivisect): https://github.com/Ana06/vivisect/tree/py-3 --- capa/features/extractors/viv/__init__.py | 6 +--- capa/features/extractors/viv/basicblock.py | 9 ++++-- capa/main.py | 36 +++++++++++++++------- setup.py | 2 ++ 4 files changed, 35 insertions(+), 18 deletions(-) diff --git a/capa/features/extractors/viv/__init__.py b/capa/features/extractors/viv/__init__.py index c6511215..47581236 100644 --- a/capa/features/extractors/viv/__init__.py +++ b/capa/features/extractors/viv/__init__.py @@ -8,11 +8,7 @@ import types -import file -import insn -import function import viv_utils -import basicblock import capa.features.extractors import capa.features.extractors.viv.file @@ -42,7 +38,7 @@ def add_va_int_cast(o): this bit of skullduggery lets use cast viv-utils objects as ints. the correct way of doing this is to update viv-utils (or subclass the objects here). """ - setattr(o, "__int__", types.MethodType(get_va, o, type(o))) + setattr(o, "__int__", types.MethodType(get_va, o)) return o diff --git a/capa/features/extractors/viv/basicblock.py b/capa/features/extractors/viv/basicblock.py index 69ae94f6..76ffefe6 100644 --- a/capa/features/extractors/viv/basicblock.py +++ b/capa/features/extractors/viv/basicblock.py @@ -125,11 +125,16 @@ def get_printable_len(oper): def is_printable_ascii(chars): - return all(ord(c) < 127 and c in string.printable for c in chars) + try: + chars_str = chars.decode("ascii") + except UnicodeDecodeError: + return False + else: + return all(c in string.printable for c in chars_str) def is_printable_utf16le(chars): - if all(c == "\x00" for c in chars[1::2]): + if all(c == b"\x00" for c in chars[1::2]): return is_printable_ascii(chars[::2]) diff --git a/capa/main.py b/capa/main.py index 27d5a9c3..2bb14ade 100644 --- a/capa/main.py +++ b/capa/main.py @@ -32,7 +32,7 @@ import capa.features.extractors from capa.helpers import oint, get_file_taste RULES_PATH_DEFAULT_STRING = "(embedded rules)" -SUPPORTED_FILE_MAGIC = set(["MZ"]) +SUPPORTED_FILE_MAGIC = set([b"MZ"]) logger = logging.getLogger("capa") @@ -304,19 +304,33 @@ class UnsupportedRuntimeError(RuntimeError): def get_extractor_py3(path, format, disable_progress=False): - from smda.SmdaConfig import SmdaConfig - from smda.Disassembler import Disassembler + if False: + from smda.SmdaConfig import SmdaConfig + from smda.Disassembler import Disassembler - import capa.features.extractors.smda + import capa.features.extractors.smda - smda_report = None - with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress): - config = SmdaConfig() - config.STORE_BUFFER = True - smda_disasm = Disassembler(config) - smda_report = smda_disasm.disassembleFile(path) + smda_report = None + with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress): + config = SmdaConfig() + config.STORE_BUFFER = True + smda_disasm = Disassembler(config) + smda_report = smda_disasm.disassembleFile(path) - return capa.features.extractors.smda.SmdaFeatureExtractor(smda_report, path) + return capa.features.extractors.smda.SmdaFeatureExtractor(smda_report, path) + else: + import capa.features.extractors.viv + + with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress): + vw = get_workspace(path, format, should_save=False) + + try: + vw.saveWorkspace() + except IOError: + # see #168 for discussion around how to handle non-writable directories + logger.info("source directory is not writable, won't save intermediate workspace") + + return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path) def get_extractor(path, format, disable_progress=False): diff --git a/setup.py b/setup.py index 4caaa749..c739f1b7 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,8 @@ if sys.version_info >= (3, 0): # py3 requirements.append("halo") requirements.append("networkx") + requirements.append("vivisect @ git+https://github.com/Ana06/vivisect@py-3#egg=vivisect") + requirements.append("viv-utils==0.3.19") requirements.append("smda==1.5.13") else: # py2 From 2da2f498a2365a6afff4f4d8fb7380edff947ffb Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Tue, 9 Feb 2021 18:38:27 +0100 Subject: [PATCH 02/14] Add script to compare vivisect Python 2 vs 3 Compare the performance of vivisect Python 2 vs 3 by counting the number of feature of each type extracted for every binary in `tests/data`. Render the ones that perform bad (under a threshold - 98) and the total performance. Render also the running time per binary for both Python 2 and 3. From this result, it seems that vivisect behaves properly with Python3. --- scripts/vivisect-py2-vs-py3.sh | 69 ++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100755 scripts/vivisect-py2-vs-py3.sh diff --git a/scripts/vivisect-py2-vs-py3.sh b/scripts/vivisect-py2-vs-py3.sh new file mode 100755 index 00000000..20e7c817 --- /dev/null +++ b/scripts/vivisect-py2-vs-py3.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +int() { + int=$(bc <<< "scale=0; ($1 + 0.5)/1") +} + +export TIMEFORMAT='%3R' +threshold_time=90 +threshold_py3_time=60 # Do not warn if it doesn't take at least 1 minute to run +rm tests/data/*.viv 2>/dev/null +mkdir results +for file in tests/data/* +do + file=$(printf %q "$file") # Handle names with white spaces + file_name=$(basename $file) + echo $file_name + + rm "$file.viv" 2>/dev/null + py3_time=$(sh -c "time python3 scripts/show-features.py $file >> results/p3-$file_name.out 2>/dev/null" 2>&1) + rm "$file.viv" 2>/dev/null + py2_time=$(sh -c "time python2 scripts/show-features.py $file >> results/p2-$file_name.out 2>/dev/null" 2>&1) + + int $py3_time + if (($int > $threshold_py3_time)) + then + percentage=$(bc <<< "scale=3; $py2_time/$py3_time*100 + 0.5") + int $percentage + if (($int < $threshold_py3_time)) + then + echo -n " SLOWER ($percentage): " + fi + fi + echo " PY2($py2_time) PY3($py3_time)" +done + +threshold_features=98 +counter=0 +average=0 +results_for() { + py3=$(cat "results/p3-$file_name.out" | grep "$1" | wc -l) + py2=$(cat "results/p2-$file_name.out" | grep "$1" | wc -l) + if (($py2 > 0)) + then + percentage=$(bc <<< "scale=2; 100*$py3/$py2") + average=$(bc <<< "scale=2; $percentage + $average") + count=$(($count + 1)) + int $percentage + if (($int < $threshold_features)) + then + echo -e "$1: py2($py2) py3($py3) $percentage% - $file_name" + fi + fi +} + +rm tests/data/*.viv 2>/dev/null +echo -e '\nRESULTS:' +for file in tests/data/* +do + file_name=$(basename $file) + if test -f "results/p2-$file_name.out"; then + results_for 'insn' + results_for 'file' + results_for 'func' + results_for 'bb' + fi +done + +average=$(bc <<< "scale=2; $average/$count") +echo "TOTAL: $average" From fc438866ec86f8779524a3cf6b7ff9c8f95b7c9c Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Tue, 9 Feb 2021 19:41:15 +0100 Subject: [PATCH 03/14] Add option to select the backend in Py3 Now we have two working backends in Python3! Add an option to select which one to use. With this code, vivisect is the default backend, but this is really easy to change. We could do some analysis to see if smda performances better than vivisect once the vivisect implementation. --- capa/main.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/capa/main.py b/capa/main.py index 2bb14ade..f52224d4 100644 --- a/capa/main.py +++ b/capa/main.py @@ -303,8 +303,8 @@ class UnsupportedRuntimeError(RuntimeError): pass -def get_extractor_py3(path, format, disable_progress=False): - if False: +def get_extractor_py3(path, format, backend, disable_progress=False): + if backend == "smda": from smda.SmdaConfig import SmdaConfig from smda.Disassembler import Disassembler @@ -333,13 +333,13 @@ def get_extractor_py3(path, format, disable_progress=False): return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path) -def get_extractor(path, format, disable_progress=False): +def get_extractor(path, format, backend="vivisect", disable_progress=False): """ raises: UnsupportedFormatError: """ if sys.version_info >= (3, 0): - return get_extractor_py3(path, format, disable_progress=disable_progress) + return get_extractor_py3(path, format, backend, disable_progress=disable_progress) else: return get_extractor_py2(path, format, disable_progress=disable_progress) @@ -515,6 +515,14 @@ def main(argv=None): parser.add_argument( "-f", "--format", choices=[f[0] for f in formats], default="auto", help="select sample format, %s" % format_help ) + parser.add_argument( + "-b", + "--backend", + type=str, + help="select the backend to use in Python 3 (this option is ignored in Python 2)", + choices=("vivisect", "smda"), + default="vivisect", + ) parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values") parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text") parser.add_argument( @@ -619,7 +627,7 @@ def main(argv=None): else: format = args.format try: - extractor = get_extractor(args.sample, args.format, disable_progress=args.quiet) + extractor = get_extractor(args.sample, args.format, args.backend, disable_progress=args.quiet) except UnsupportedFormatError: logger.error("-" * 80) logger.error(" Input file does not appear to be a PE file.") From bbb7878e0aa99c338f6889cf75c42682c17814b5 Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Tue, 9 Feb 2021 20:04:30 +0100 Subject: [PATCH 04/14] Enable tests for vivisect in Python3 Now we support vivisect as backend in Python3. We should test it. --- tests/fixtures.py | 6 +----- tests/test_viv_features.py | 6 ++---- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 4261408b..5ef9c3a4 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -520,11 +520,7 @@ def do_test_feature_count(get_extractor, sample, scope, feature, expected): def get_extractor(path): - if sys.version_info >= (3, 0): - extractor = get_smda_extractor(path) - else: - extractor = get_viv_extractor(path) - + extractor = get_viv_extractor(path) # overload the extractor so that the fixture exposes `extractor.path` setattr(extractor, "path", path) return extractor diff --git a/tests/test_viv_features.py b/tests/test_viv_features.py index 0922d758..3206e8cf 100644 --- a/tests/test_viv_features.py +++ b/tests/test_viv_features.py @@ -16,8 +16,7 @@ from fixtures import * indirect=["sample", "scope"], ) def test_viv_features(sample, scope, feature, expected): - with xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2"): - do_test_feature_presence(get_viv_extractor, sample, scope, feature, expected) + do_test_feature_presence(get_viv_extractor, sample, scope, feature, expected) @parametrize( @@ -26,5 +25,4 @@ def test_viv_features(sample, scope, feature, expected): indirect=["sample", "scope"], ) def test_viv_feature_counts(sample, scope, feature, expected): - with xfail(sys.version_info >= (3, 0), reason="vivsect only works on py2"): - do_test_feature_count(get_viv_extractor, sample, scope, feature, expected) + do_test_feature_count(get_viv_extractor, sample, scope, feature, expected) From 2859b037aaa9cc0e04fef79bc50a6747ec0e801e Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Mon, 15 Feb 2021 10:52:40 +0100 Subject: [PATCH 05/14] Use constants for backend option Use constants instead of string literals for the backend option. --- capa/main.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/capa/main.py b/capa/main.py index f52224d4..9cbbceb4 100644 --- a/capa/main.py +++ b/capa/main.py @@ -33,6 +33,8 @@ from capa.helpers import oint, get_file_taste RULES_PATH_DEFAULT_STRING = "(embedded rules)" SUPPORTED_FILE_MAGIC = set([b"MZ"]) +BACKEND_VIV = "vivisect" +BACKEND_SMDA = "smda" logger = logging.getLogger("capa") @@ -333,7 +335,7 @@ def get_extractor_py3(path, format, backend, disable_progress=False): return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path) -def get_extractor(path, format, backend="vivisect", disable_progress=False): +def get_extractor(path, format, backend=BACKEND_VIV, disable_progress=False): """ raises: UnsupportedFormatError: @@ -520,8 +522,8 @@ def main(argv=None): "--backend", type=str, help="select the backend to use in Python 3 (this option is ignored in Python 2)", - choices=("vivisect", "smda"), - default="vivisect", + choices=(BACKEND_VIV, BACKEND_SMDA), + default=BACKEND_VIV, ) parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values") parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text") From 40ed2f39a45cec3465e72e476ab974ffc5d66adc Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Tue, 23 Feb 2021 17:38:06 +0100 Subject: [PATCH 06/14] Make backend a required parameter in get_extractor Make the `backend` argument required in the `get_extractor` internal routine. Specify a backend in the scripts which call this function. Add a CLI backend option in capa/features/freeze.py as well. --- capa/features/freeze.py | 10 +++++++++- capa/main.py | 2 +- scripts/bulk-process.py | 2 +- scripts/capa_as_library.py | 2 +- scripts/lint.py | 2 +- scripts/show-capabilities-by-function.py | 2 +- scripts/show-features.py | 2 +- 7 files changed, 15 insertions(+), 7 deletions(-) diff --git a/capa/features/freeze.py b/capa/features/freeze.py index 2e6ff8ee..3d57b76e 100644 --- a/capa/features/freeze.py +++ b/capa/features/freeze.py @@ -264,6 +264,14 @@ def main(argv=None): parser.add_argument( "-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help ) + parser.add_argument( + "-b", + "--backend", + type=str, + help="select the backend to use in Python 3 (this option is ignored in Python 2)", + choices=(capa.main.BACKEND_VIV, capa.main.BACKEND_SMDA), + default=capa.main.BACKEND_VIV, + ) args = parser.parse_args(args=argv) if args.quiet: @@ -276,7 +284,7 @@ def main(argv=None): logging.basicConfig(level=logging.INFO) logging.getLogger().setLevel(logging.INFO) - extractor = capa.main.get_extractor(args.sample, args.format) + extractor = capa.main.get_extractor(args.sample, args.format, args.backend) with open(args.output, "wb") as f: f.write(dump(extractor)) diff --git a/capa/main.py b/capa/main.py index 9cbbceb4..f80319f1 100644 --- a/capa/main.py +++ b/capa/main.py @@ -335,7 +335,7 @@ def get_extractor_py3(path, format, backend, disable_progress=False): return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path) -def get_extractor(path, format, backend=BACKEND_VIV, disable_progress=False): +def get_extractor(path, format, backend, disable_progress=False): """ raises: UnsupportedFormatError: diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index 75ebaab9..65f7c66f 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -95,7 +95,7 @@ def get_capa_results(args): rules, format, path = args logger.info("computing capa results for: %s", path) try: - extractor = capa.main.get_extractor(path, format, disable_progress=True) + extractor = capa.main.get_extractor(path, format, capa.main.BACKEND_VIV, disable_progress=True) except capa.main.UnsupportedFormatError: # i'm 100% sure if multiprocessing will reliably raise exceptions across process boundaries. # so instead, return an object with explicit success/failure status. diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py index 9a3d9749..587e1437 100644 --- a/scripts/capa_as_library.py +++ b/scripts/capa_as_library.py @@ -191,7 +191,7 @@ def render_dictionary(doc): def capa_details(file_path, output_format="dictionary"): # extract features and find capabilities - extractor = capa.main.get_extractor(file_path, "auto", disable_progress=True) + extractor = capa.main.get_extractor(file_path, "auto", capa.main.BACKEND_VIV, disable_progress=True) capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True) # collect metadata (used only to make rendering more complete) diff --git a/scripts/lint.py b/scripts/lint.py index 20aae6e0..1e177df9 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -201,7 +201,7 @@ class DoesntMatchExample(Lint): continue try: - extractor = capa.main.get_extractor(path, "auto", disable_progress=True) + extractor = capa.main.get_extractor(path, "auto", capa.main.BACKEND_VIV, disable_progress=True) capabilities, meta = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True) except Exception as e: logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e) diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index 73ab3da3..a8feb35e 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -199,7 +199,7 @@ def main(argv=None): else: format = args.format try: - extractor = capa.main.get_extractor(args.sample, args.format) + extractor = capa.main.get_extractor(args.sample, args.format, capa.main.BACKEND_VIV) except capa.main.UnsupportedFormatError: logger.error("-" * 80) logger.error(" Input file does not appear to be a PE file.") diff --git a/scripts/show-features.py b/scripts/show-features.py index 60668686..c8f74de9 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -125,7 +125,7 @@ def main(argv=None): extractor = capa.features.freeze.load(f.read()) else: try: - extractor = capa.main.get_extractor(args.sample, args.format) + extractor = capa.main.get_extractor(args.sample, args.format, capa.main.BACKEND_VIV) except capa.main.UnsupportedFormatError: logger.error("-" * 80) logger.error(" Input file does not appear to be a PE file.") From a4a0a564481074a86c4406be927ea67036ae2cff Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Wed, 24 Feb 2021 17:24:24 +0100 Subject: [PATCH 07/14] Vivisect 1.0.0 released Vivisect 1.0.0 (Python 3) has been released, so we do not need to link to my GitHub branch anymore. https://pypi.org/project/vivisect --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c739f1b7..b7c90b3d 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ if sys.version_info >= (3, 0): # py3 requirements.append("halo") requirements.append("networkx") - requirements.append("vivisect @ git+https://github.com/Ana06/vivisect@py-3#egg=vivisect") + requirements.append("vivisect==1.0.0") requirements.append("viv-utils==0.3.19") requirements.append("smda==1.5.13") else: From 18eaea95fa4b134cce2513c0c0ae172208055524 Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Thu, 25 Feb 2021 10:15:49 +0100 Subject: [PATCH 08/14] Fix TypeError exception in Python3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `va` can be None and this causes Python 3 to raise a TypeError exception. This is caused by the following breaking change in Python3: > The ordering comparison operators (<, <=, >=, >) raise a TypeError > exception when the operands don’t have a meaningful natural ordering. This didn't failed in the previously tried vivisect version (master from one week ago and not the release). This may have been caused by a bug in vivisect that has been fixed. --- capa/features/extractors/viv/insn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 7b7db57c..96ed1396 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -488,7 +488,7 @@ def extract_insn_segment_access_features(f, bb, insn): def get_section(vw, va): for start, length, _, __ in vw.getMemoryMaps(): - if start <= va < start + length: + if va and start <= va < start + length: return start raise KeyError(va) From e5048fd3ac56dcddfac2eac07b1c3614db031ba6 Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Thu, 25 Feb 2021 10:50:25 +0100 Subject: [PATCH 09/14] Add missing va parameter to SegmentationViolation The `envi.SegmentationViolation()` was missing the `va` required parameter. This has started failing now, because calling `vw.getSegment(0x4BA190)` for the `tests/data/mimikatz.exe_` produces different results in Python 2 and Python 3. It returns `None` in Python 3 while the output in Python 2 is: `(4939776, 16840, '.data', 'mimikatz')` I have reported the issue to vivisect: https://github.com/vivisect/vivisect/issues/370 --- capa/features/extractors/viv/insn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 96ed1396..511d6fca 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -239,7 +239,7 @@ def read_bytes(vw, va): """ segm = vw.getSegment(va) if not segm: - raise envi.SegmentationViolation() + raise envi.SegmentationViolation(va) segm_end = segm[0] + segm[1] try: From 079a9b5204bc74f3e57382be16e7203c9c52c0f7 Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Thu, 25 Feb 2021 20:55:57 +0100 Subject: [PATCH 10/14] Remove backend option from Python 2 Do only provide the backend option in Python 3, as there is only one backend in Python 2. This way we keep the help text simpler. --- capa/features/freeze.py | 20 +++++++++++--------- capa/main.py | 20 +++++++++++--------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/capa/features/freeze.py b/capa/features/freeze.py index 3d57b76e..af6a77f3 100644 --- a/capa/features/freeze.py +++ b/capa/features/freeze.py @@ -264,14 +264,15 @@ def main(argv=None): parser.add_argument( "-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help ) - parser.add_argument( - "-b", - "--backend", - type=str, - help="select the backend to use in Python 3 (this option is ignored in Python 2)", - choices=(capa.main.BACKEND_VIV, capa.main.BACKEND_SMDA), - default=capa.main.BACKEND_VIV, - ) + if sys.version_info >= (3, 0): + parser.add_argument( + "-b", + "--backend", + type=str, + help="select the backend to use", + choices=(capa.main.BACKEND_VIV, capa.main.BACKEND_SMDA), + default=capa.main.BACKEND_VIV, + ) args = parser.parse_args(args=argv) if args.quiet: @@ -284,7 +285,8 @@ def main(argv=None): logging.basicConfig(level=logging.INFO) logging.getLogger().setLevel(logging.INFO) - extractor = capa.main.get_extractor(args.sample, args.format, args.backend) + backend = args.backend if sys.version_info > (3, 0) else None + extractor = capa.main.get_extractor(args.sample, args.format, backend) with open(args.output, "wb") as f: f.write(dump(extractor)) diff --git a/capa/main.py b/capa/main.py index f80319f1..25cc3462 100644 --- a/capa/main.py +++ b/capa/main.py @@ -517,14 +517,15 @@ def main(argv=None): parser.add_argument( "-f", "--format", choices=[f[0] for f in formats], default="auto", help="select sample format, %s" % format_help ) - parser.add_argument( - "-b", - "--backend", - type=str, - help="select the backend to use in Python 3 (this option is ignored in Python 2)", - choices=(BACKEND_VIV, BACKEND_SMDA), - default=BACKEND_VIV, - ) + if sys.version_info >= (3, 0): + parser.add_argument( + "-b", + "--backend", + type=str, + help="select the backend to use", + choices=(BACKEND_VIV, BACKEND_SMDA), + default=BACKEND_VIV, + ) parser.add_argument("-t", "--tag", type=str, help="filter on rule meta field values") parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text") parser.add_argument( @@ -629,7 +630,8 @@ def main(argv=None): else: format = args.format try: - extractor = get_extractor(args.sample, args.format, args.backend, disable_progress=args.quiet) + backend = args.backend if sys.version_info > (3, 0) else None + extractor = get_extractor(args.sample, args.format, backend, disable_progress=args.quiet) except UnsupportedFormatError: logger.error("-" * 80) logger.error(" Input file does not appear to be a PE file.") From 42af7b2d8bda27aed1aa8a496097867694846c27 Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Mon, 1 Mar 2021 08:17:42 +0100 Subject: [PATCH 11/14] Use default backend instead of None Set the `backend` variable to the default backend by default instead to `None`. The `backend` variable is needed in Python 2 as `args.backend` is only set in Python 3. Although the value of the backend variable is ignored in Python 2, so that the default value is not used. Co-authored-by: William Ballenthin --- capa/features/freeze.py | 2 +- capa/main.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/features/freeze.py b/capa/features/freeze.py index af6a77f3..51f11e53 100644 --- a/capa/features/freeze.py +++ b/capa/features/freeze.py @@ -285,7 +285,7 @@ def main(argv=None): logging.basicConfig(level=logging.INFO) logging.getLogger().setLevel(logging.INFO) - backend = args.backend if sys.version_info > (3, 0) else None + backend = args.backend if sys.version_info > (3, 0) else capa.main.BACKEND_VIV extractor = capa.main.get_extractor(args.sample, args.format, backend) with open(args.output, "wb") as f: f.write(dump(extractor)) diff --git a/capa/main.py b/capa/main.py index 25cc3462..d76c6852 100644 --- a/capa/main.py +++ b/capa/main.py @@ -630,7 +630,7 @@ def main(argv=None): else: format = args.format try: - backend = args.backend if sys.version_info > (3, 0) else None + backend = args.backend if sys.version_info > (3, 0) else capa.main.BACKEND_VIV extractor = get_extractor(args.sample, args.format, backend, disable_progress=args.quiet) except UnsupportedFormatError: logger.error("-" * 80) From 695b5b50ab974e072f20cee1da8efc4b64cf1a39 Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Tue, 2 Mar 2021 12:03:20 +0100 Subject: [PATCH 12/14] Remove va not None check Instead of checking if `va` is `None in `get_section()` we should avoid calling this function with `None`. This have been fixed in the following PR, so this is not longer needed: https://github.com/fireeye/capa/pull/442 --- capa/features/extractors/viv/insn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 511d6fca..6321fcab 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -488,7 +488,7 @@ def extract_insn_segment_access_features(f, bb, insn): def get_section(vw, va): for start, length, _, __ in vw.getMemoryMaps(): - if va and start <= va < start + length: + if start <= va < start + length: return start raise KeyError(va) From 29b6772721511e52369933c60fdff1d0e54b7643 Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Tue, 2 Mar 2021 12:27:46 +0100 Subject: [PATCH 13/14] Test backend option As `get_extractor` returns only vivisect now, `test_main` is not run for smda. Test that capa works with all backends. It doesn't test that the backend is actually called. --- tests/test_main.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_main.py b/tests/test_main.py index 783fc95c..9b9dc89f 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -365,3 +365,17 @@ def test_not_render_rules_also_matched(z9324d_extractor, capsys): assert "act as TCP client" in std.out assert "connect TCP socket" in std.out assert "create TCP socket" in std.out + + +# It tests main works with different backends. It doesn't test that the backend +# is actually called. +def test_backend_option(capsys): + if sys.version_info > (3, 0): + path = get_data_path_by_name("pma16-01") + assert capa.main.main([path, "-b", capa.main.BACKEND_VIV]) == 0 + std = capsys.readouterr() + assert "check for PEB NtGlobalFlag flag (24 matches)" in std.out + + assert capa.main.main([path, "-b", capa.main.BACKEND_SMDA]) == 0 + std = capsys.readouterr() + assert "check for PEB NtGlobalFlag flag (24 matches)" in std.out From c522f5094a7e5957e1cadfc7c420227289908780 Mon Sep 17 00:00:00 2001 From: Ana Maria Martinez Gomez Date: Tue, 2 Mar 2021 19:46:21 +0100 Subject: [PATCH 14/14] Use -j option in test_backend_option Use `-j` option in `test_backend_option` to check the extractor and that rules have been extracted. This way we don't need to check if a concrete rule matches, but only that at least a rule matches. --- tests/test_main.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index 9b9dc89f..6732de2d 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -7,6 +7,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import sys +import json import textwrap import pytest @@ -367,15 +368,18 @@ def test_not_render_rules_also_matched(z9324d_extractor, capsys): assert "create TCP socket" in std.out -# It tests main works with different backends. It doesn't test that the backend -# is actually called. +# It tests main works with different backends def test_backend_option(capsys): if sys.version_info > (3, 0): path = get_data_path_by_name("pma16-01") - assert capa.main.main([path, "-b", capa.main.BACKEND_VIV]) == 0 + assert capa.main.main([path, "-j", "-b", capa.main.BACKEND_VIV]) == 0 std = capsys.readouterr() - assert "check for PEB NtGlobalFlag flag (24 matches)" in std.out + std_json = json.loads(std.out) + assert std_json["meta"]["analysis"]["extractor"] == "VivisectFeatureExtractor" + assert len(std_json["rules"]) > 0 - assert capa.main.main([path, "-b", capa.main.BACKEND_SMDA]) == 0 + assert capa.main.main([path, "-j", "-b", capa.main.BACKEND_SMDA]) == 0 std = capsys.readouterr() - assert "check for PEB NtGlobalFlag flag (24 matches)" in std.out + std_json = json.loads(std.out) + assert std_json["meta"]["analysis"]["extractor"] == "SmdaFeatureExtractor" + assert len(std_json["rules"]) > 0