From 84052c3ac566d7c33f9d61f0f60083c872b8346e Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Mon, 27 Mar 2023 19:21:55 +0530 Subject: [PATCH 01/14] init --- .github/workflows/tests.yml | 2 ++ setup.py | 1 + 2 files changed, 3 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2cb190b3..a0f12b49 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -33,6 +33,8 @@ jobs: python-version: "3.8" - name: Install dependencies run: pip install -e .[dev] + - name: Lint with ruff + run: ruff check . - name: Lint with isort run: isort --profile black --length-sort --line-width 120 --skip-glob "*_pb2.py" -c . - name: Lint with black diff --git a/setup.py b/setup.py index 642b0e4d..3eca0124 100644 --- a/setup.py +++ b/setup.py @@ -74,6 +74,7 @@ setuptools.setup( "pytest-instafail==0.4.2", "pytest-cov==4.0.0", "pycodestyle==2.10.0", + "ruff==0.0.259", "black==23.1.0", "isort==5.11.4", "mypy==1.1.1", From ed64986af866e8c8fe43ed6ae963f9602d899d84 Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Thu, 30 Mar 2023 14:22:11 +0530 Subject: [PATCH 02/14] adds a ruff.toml file for config --- .github/ruff.toml | 5 +++++ .github/workflows/tests.yml | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 .github/ruff.toml diff --git a/.github/ruff.toml b/.github/ruff.toml new file mode 100644 index 00000000..7d208862 --- /dev/null +++ b/.github/ruff.toml @@ -0,0 +1,5 @@ +select = ["E"] +ignore = [] + +# Same as pycodestyle. +line-length = 180 \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a0f12b49..b5fdb32f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -34,7 +34,7 @@ jobs: - name: Install dependencies run: pip install -e .[dev] - name: Lint with ruff - run: ruff check . + run: ruff --config .github/ruff.toml --exclude "*_pb2.pyi" check . - name: Lint with isort run: isort --profile black --length-sort --line-width 120 --skip-glob "*_pb2.py" -c . - name: Lint with black From c04774b4b1c68a25bebb0ee38ce16a76f842ca61 Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Tue, 4 Apr 2023 18:27:30 +0530 Subject: [PATCH 03/14] solving unresolvable issues using --fix and ignoring some issues --- .github/ruff.toml | 5 +++-- .github/workflows/tests.yml | 2 +- capa/ida/plugin/form.py | 6 ++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index 7d208862..ddf7fe94 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -1,5 +1,6 @@ select = ["E"] -ignore = [] +ignore = ["E402", "E722", "E902"] +exclude = ["*_pb2.py", "*_pb2.pyi", "capa2yara.py"] # Same as pycodestyle. -line-length = 180 \ No newline at end of file +line-length = 180 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b5fdb32f..6d270aaa 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -34,7 +34,7 @@ jobs: - name: Install dependencies run: pip install -e .[dev] - name: Lint with ruff - run: ruff --config .github/ruff.toml --exclude "*_pb2.pyi" check . + run: ruff --config .github/ruff.toml check . - name: Lint with isort run: isort --profile black --length-sort --line-width 120 --skip-glob "*_pb2.py" -c . - name: Lint with black diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 6084277d..70a5f128 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -607,7 +607,8 @@ class CapaExplorerForm(idaapi.PluginForm): except UserCancelledError as e: capa.ida.helpers.inform_user_ida_ui("Analysis requires capa rules") logger.warning( - f"You must specify a directory containing capa rules before running analysis. Download and extract the official rules from {CAPA_OFFICIAL_RULESET_URL} (recommended)." + f"You must specify a directory containing capa rules before running analysis. " + f"Download and extract the official rules from {CAPA_OFFICIAL_RULESET_URL} (recommended)." ) return False except Exception as e: @@ -705,7 +706,8 @@ class CapaExplorerForm(idaapi.PluginForm): capa.ida.helpers.inform_user_ida_ui("Cached results were generated using different capas rules") logger.warning( - "capa is showing you cached results from a previous analysis run. Your rules have changed since and you should reanalyze the program to see new results." + "capa is showing you cached results from a previous analysis run. " + "Your rules have changed since and you should reanalyze the program to see new results." ) view_status_rules = "no rules matched for cache" From a260b35c9db51e8345066cb44a6abf5331f2ea50 Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Tue, 4 Apr 2023 18:28:43 +0530 Subject: [PATCH 04/14] --fix --- capa/features/extractors/binja/insn.py | 2 +- capa/features/extractors/ida/insn.py | 2 +- capa/ida/plugin/form.py | 2 +- capa/main.py | 3 +- tests/test_engine.py | 115 +++++++++------------- tests/test_function_id.py | 6 +- tests/test_rules.py | 129 ++++++++++++------------- 7 files changed, 119 insertions(+), 140 deletions(-) diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index 23de37ce..99d5d6ef 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -439,7 +439,7 @@ def extract_insn_peb_access_characteristic_features( return True value = right.value.value - if not (reg, value) in (("fsbase", 0x30), ("gsbase", 0x60)): + if (reg, value) not in (("fsbase", 48), ("gsbase", 96)): return True results.append((Characteristic("peb access"), ih.address)) diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index ac8c8956..7acae816 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -73,7 +73,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) """ insn: idaapi.insn_t = ih.inner - if not insn.get_canon_mnem() in ("call", "jmp"): + if insn.get_canon_mnem() not in ("call", "jmp"): return # check calls to imported functions diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 70a5f128..75e04b9e 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -536,7 +536,7 @@ class CapaExplorerForm(idaapi.PluginForm): @param new_ea: destination ea @param old_ea: source ea """ - if not self.view_tabs.currentIndex() in (0, 1): + if self.view_tabs.currentIndex() not in (0, 1): return if idaapi.get_widget_type(widget) != idaapi.BWN_DISASM: diff --git a/capa/main.py b/capa/main.py index f5f1178a..172f6c70 100644 --- a/capa/main.py +++ b/capa/main.py @@ -254,7 +254,8 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro if disable_progress: # do not use tqdm to avoid unnecessary side effects when caller intends # to disable progress completely - pbar = lambda s, *args, **kwargs: s + def pbar(s, *args, **kwargs): + return s functions = list(extractor.get_functions()) n_funcs = len(functions) diff --git a/tests/test_engine.py b/tests/test_engine.py index 09560257..8806a59b 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -17,112 +17,93 @@ ADDR4 = capa.features.address.AbsoluteVirtualAddress(0x401004) def test_number(): - assert Number(1).evaluate({Number(0): {ADDR1}}) == False - assert Number(1).evaluate({Number(1): {ADDR1}}) == True - assert Number(1).evaluate({Number(2): {ADDR1, ADDR2}}) == False + assert Number(1).evaluate({Number(0): {ADDR1}}) is False + assert Number(1).evaluate({Number(1): {ADDR1}}) is True + assert Number(1).evaluate({Number(2): {ADDR1, ADDR2}}) is False def test_and(): - assert And([Number(1)]).evaluate({Number(0): {ADDR1}}) == False - assert And([Number(1)]).evaluate({Number(1): {ADDR1}}) == True - assert And([Number(1), Number(2)]).evaluate({Number(0): {ADDR1}}) == False - assert And([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) == False - assert And([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}) == False - assert And([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR2}}) == True + assert And([Number(1)]).evaluate({Number(0): {ADDR1}}) is False + assert And([Number(1)]).evaluate({Number(1): {ADDR1}}) is True + assert And([Number(1), Number(2)]).evaluate({Number(0): {ADDR1}}) is False + assert And([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) is False + assert And([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}) is False + assert And([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR2}}) is True def test_or(): - assert Or([Number(1)]).evaluate({Number(0): {ADDR1}}) == False - assert Or([Number(1)]).evaluate({Number(1): {ADDR1}}) == True - assert Or([Number(1), Number(2)]).evaluate({Number(0): {ADDR1}}) == False - assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) == True - assert Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}) == True - assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR2}}) == True + assert Or([Number(1)]).evaluate({Number(0): {ADDR1}}) is False + assert Or([Number(1)]).evaluate({Number(1): {ADDR1}}) is True + assert Or([Number(1), Number(2)]).evaluate({Number(0): {ADDR1}}) is False + assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) is True + assert Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}) is True + assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR2}}) is True def test_not(): - assert Not(Number(1)).evaluate({Number(0): {ADDR1}}) == True - assert Not(Number(1)).evaluate({Number(1): {ADDR1}}) == False + assert Not(Number(1)).evaluate({Number(0): {ADDR1}}) is True + assert Not(Number(1)).evaluate({Number(1): {ADDR1}}) is False def test_some(): - assert Some(0, [Number(1)]).evaluate({Number(0): {ADDR1}}) == True - assert Some(1, [Number(1)]).evaluate({Number(0): {ADDR1}}) == False + assert Some(0, [Number(1)]).evaluate({Number(0): {ADDR1}}) is True + assert Some(1, [Number(1)]).evaluate({Number(0): {ADDR1}}) is False - assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}}) == False - assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}}) == False + assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}}) is False + assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}}) is False assert ( - Some(2, [Number(1), Number(2), Number(3)]).evaluate( - {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}} - ) - == True + Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}}) is True ) assert ( - Some(2, [Number(1), Number(2), Number(3)]).evaluate( - {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}} - ) - == True + Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}}) is True ) assert ( - Some(2, [Number(1), Number(2), Number(3)]).evaluate( - { - Number(0): {ADDR1}, - Number(1): {ADDR1}, - Number(2): {ADDR1}, - Number(3): {ADDR1}, - Number(4): {ADDR1}, - } - ) - == True + Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}, Number(4): {ADDR1}}) is True ) def test_complex(): - assert True == Or( - [And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5), Number(6)])])] - ).evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) + assert True is Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5), Number(6)])])]).evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) - assert False == Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5)])])]).evaluate( - {Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}} - ) + assert False is Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5)])])]).evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) def test_range(): # unbounded range, but no matching feature # since the lower bound is zero, and there are zero matches, ok - assert Range(Number(1)).evaluate({Number(2): {}}) == True + assert Range(Number(1)).evaluate({Number(2): {}}) is True # unbounded range with matching feature should always match - assert Range(Number(1)).evaluate({Number(1): {}}) == True - assert Range(Number(1)).evaluate({Number(1): {ADDR1}}) == True + assert Range(Number(1)).evaluate({Number(1): {}}) is True + assert Range(Number(1)).evaluate({Number(1): {ADDR1}}) is True # unbounded max - assert Range(Number(1), min=1).evaluate({Number(1): {ADDR1}}) == True - assert Range(Number(1), min=2).evaluate({Number(1): {ADDR1}}) == False - assert Range(Number(1), min=2).evaluate({Number(1): {ADDR1, ADDR2}}) == True + assert Range(Number(1), min=1).evaluate({Number(1): {ADDR1}}) is True + assert Range(Number(1), min=2).evaluate({Number(1): {ADDR1}}) is False + assert Range(Number(1), min=2).evaluate({Number(1): {ADDR1, ADDR2}}) is True # unbounded min - assert Range(Number(1), max=0).evaluate({Number(1): {ADDR1}}) == False - assert Range(Number(1), max=1).evaluate({Number(1): {ADDR1}}) == True - assert Range(Number(1), max=2).evaluate({Number(1): {ADDR1}}) == True - assert Range(Number(1), max=2).evaluate({Number(1): {ADDR1, ADDR2}}) == True - assert Range(Number(1), max=2).evaluate({Number(1): {ADDR1, ADDR2, ADDR3}}) == False + assert Range(Number(1), max=0).evaluate({Number(1): {ADDR1}}) is False + assert Range(Number(1), max=1).evaluate({Number(1): {ADDR1}}) is True + assert Range(Number(1), max=2).evaluate({Number(1): {ADDR1}}) is True + assert Range(Number(1), max=2).evaluate({Number(1): {ADDR1, ADDR2}}) is True + assert Range(Number(1), max=2).evaluate({Number(1): {ADDR1, ADDR2, ADDR3}}) is False # we can do an exact match by setting min==max - assert Range(Number(1), min=1, max=1).evaluate({Number(1): {}}) == False - assert Range(Number(1), min=1, max=1).evaluate({Number(1): {ADDR1}}) == True - assert Range(Number(1), min=1, max=1).evaluate({Number(1): {ADDR1, ADDR2}}) == False + assert Range(Number(1), min=1, max=1).evaluate({Number(1): {}}) is False + assert Range(Number(1), min=1, max=1).evaluate({Number(1): {ADDR1}}) is True + assert Range(Number(1), min=1, max=1).evaluate({Number(1): {ADDR1, ADDR2}}) is False # bounded range - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {}}) == False - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1}}) == True - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2}}) == True - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2, ADDR3}}) == True - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2, ADDR3, ADDR4}}) == False + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {}}) is False + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1}}) is True + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2}}) is True + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2, ADDR3}}) is True + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2, ADDR3, ADDR4}}) is False def test_short_circuit(): - assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) == True + assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) is True # with short circuiting, only the children up until the first satisfied child are captured. assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}, short_circuit=True).children) == 1 @@ -131,8 +112,8 @@ def test_short_circuit(): def test_eval_order(): # base cases. - assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) == True - assert Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}) == True + assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) is True + assert Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}) is True # with short circuiting, only the children up until the first satisfied child are captured. assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}).children) == 1 diff --git a/tests/test_function_id.py b/tests/test_function_id.py index c6ad2be9..f256db32 100644 --- a/tests/test_function_id.py +++ b/tests/test_function_id.py @@ -4,20 +4,20 @@ import capa.features.insn def test_function_id_simple_match(pma16_01_extractor): - assert pma16_01_extractor.is_library_function(0x407490) == True + assert pma16_01_extractor.is_library_function(4224144) is True assert pma16_01_extractor.get_function_name(0x407490) == "__aulldiv" def test_function_id_gz_pat(pma16_01_extractor): # aullrem is stored in `test_aullrem.pat.gz` - assert pma16_01_extractor.is_library_function(0x407500) == True + assert pma16_01_extractor.is_library_function(4224256) is True assert pma16_01_extractor.get_function_name(0x407500) == "__aullrem" def test_function_id_complex_match(pma16_01_extractor): # 0x405714 is __spawnlp which requires recursive match of __spawnvp at 0x407FAB # (and __spawnvpe at 0x409DE8) - assert pma16_01_extractor.is_library_function(0x405714) == True + assert pma16_01_extractor.is_library_function(4216596) is True assert pma16_01_extractor.get_function_name(0x405714) == "__spawnlp" diff --git a/tests/test_rules.py b/tests/test_rules.py index 29db2a2f..132f478f 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -40,8 +40,8 @@ ADDR4 = capa.features.address.AbsoluteVirtualAddress(0x401004) def test_rule_ctor(): r = capa.rules.Rule("test rule", capa.rules.FUNCTION_SCOPE, Or([Number(1)]), {}) - assert r.evaluate({Number(0): {ADDR1}}) == False - assert r.evaluate({Number(1): {ADDR2}}) == True + assert r.evaluate({Number(0): {ADDR1}}) is False + assert r.evaluate({Number(1): {ADDR2}}) is True def test_rule_yaml(): @@ -63,10 +63,10 @@ def test_rule_yaml(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(0): {ADDR1}}) == False - assert r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}}) == False - assert r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}}) == True - assert r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}}) == True + assert r.evaluate({Number(0): {ADDR1}}) is False + assert r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}}) is False + assert r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}}) is True + assert r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}}) is True def test_rule_yaml_complex(): @@ -89,8 +89,8 @@ def test_rule_yaml_complex(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) == True - assert r.evaluate({Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) == False + assert r.evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) is True + assert r.evaluate({Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) is False def test_rule_descriptions(): @@ -167,8 +167,8 @@ def test_rule_yaml_not(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(1): {ADDR1}}) == True - assert r.evaluate({Number(1): {ADDR1}, Number(2): {ADDR1}}) == False + assert r.evaluate({Number(1): {ADDR1}}) is True + assert r.evaluate({Number(1): {ADDR1}, Number(2): {ADDR1}}) is False def test_rule_yaml_count(): @@ -182,9 +182,9 @@ def test_rule_yaml_count(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(100): set()}) == False - assert r.evaluate({Number(100): {ADDR1}}) == True - assert r.evaluate({Number(100): {ADDR1, ADDR2}}) == False + assert r.evaluate({Number(100): set()}) is False + assert r.evaluate({Number(100): {ADDR1}}) is True + assert r.evaluate({Number(100): {ADDR1, ADDR2}}) is False def test_rule_yaml_count_range(): @@ -198,10 +198,10 @@ def test_rule_yaml_count_range(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(100): set()}) == False - assert r.evaluate({Number(100): {ADDR1}}) == True - assert r.evaluate({Number(100): {ADDR1, ADDR2}}) == True - assert r.evaluate({Number(100): {ADDR1, ADDR2, ADDR3}}) == False + assert r.evaluate({Number(100): set()}) is False + assert r.evaluate({Number(100): {ADDR1}}) is True + assert r.evaluate({Number(100): {ADDR1, ADDR2}}) is True + assert r.evaluate({Number(100): {ADDR1, ADDR2, ADDR3}}) is False def test_rule_yaml_count_string(): @@ -215,10 +215,10 @@ def test_rule_yaml_count_string(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({String("foo"): set()}) == False - assert r.evaluate({String("foo"): {ADDR1}}) == False - assert r.evaluate({String("foo"): {ADDR1, ADDR2}}) == True - assert r.evaluate({String("foo"): {ADDR1, ADDR2, ADDR3}}) == False + assert r.evaluate({String("foo"): set()}) is False + assert r.evaluate({String("foo"): {ADDR1}}) is False + assert r.evaluate({String("foo"): {ADDR1, ADDR2}}) is True + assert r.evaluate({String("foo"): {ADDR1, ADDR2, ADDR3}}) is False def test_invalid_rule_feature(): @@ -466,12 +466,12 @@ def test_number_symbol(): ) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) - assert (Number(1) in children) == True - assert (Number(0xFFFFFFFF) in children) == True - assert (Number(2, description="symbol name") in children) == True - assert (Number(3, description="symbol name") in children) == True - assert (Number(4, description="symbol name = another name") in children) == True - assert (Number(0x100, description="symbol name") in children) == True + assert (Number(1) in children) is True + assert (Number(4294967295) in children) is True + assert (Number(2, description="symbol name") in children) is True + assert (Number(3, description="symbol name") in children) is True + assert (Number(4, description="symbol name = another name") in children) is True + assert (Number(256, description="symbol name") in children) is True def test_count_number_symbol(): @@ -488,11 +488,11 @@ def test_count_number_symbol(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(2): set()}) == False - assert r.evaluate({Number(2): {ADDR1}}) == True - assert r.evaluate({Number(2): {ADDR1, ADDR2}}) == False - assert r.evaluate({Number(0x100, description="symbol name"): {ADDR1}}) == False - assert r.evaluate({Number(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}}) == True + assert r.evaluate({Number(2): set()}) is False + assert r.evaluate({Number(2): {ADDR1}}) is True + assert r.evaluate({Number(2): {ADDR1, ADDR2}}) is False + assert r.evaluate({Number(256, description="symbol name"): {ADDR1}}) is False + assert r.evaluate({Number(256, description="symbol name"): {ADDR1, ADDR2, ADDR3}}) is True def test_invalid_number(): @@ -553,11 +553,11 @@ def test_offset_symbol(): ) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) - assert (Offset(1) in children) == True - assert (Offset(2, description="symbol name") in children) == True - assert (Offset(3, description="symbol name") in children) == True - assert (Offset(4, description="symbol name = another name") in children) == True - assert (Offset(0x100, description="symbol name") in children) == True + assert (Offset(1) in children) is True + assert (Offset(2, description="symbol name") in children) is True + assert (Offset(3, description="symbol name") in children) is True + assert (Offset(4, description="symbol name = another name") in children) is True + assert (Offset(256, description="symbol name") in children) is True def test_count_offset_symbol(): @@ -574,11 +574,11 @@ def test_count_offset_symbol(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Offset(2): set()}) == False - assert r.evaluate({Offset(2): {ADDR1}}) == True - assert r.evaluate({Offset(2): {ADDR1, ADDR2}}) == False - assert r.evaluate({Offset(0x100, description="symbol name"): {ADDR1}}) == False - assert r.evaluate({Offset(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}}) == True + assert r.evaluate({Offset(2): set()}) is False + assert r.evaluate({Offset(2): {ADDR1}}) is True + assert r.evaluate({Offset(2): {ADDR1, ADDR2}}) is False + assert r.evaluate({Offset(256, description="symbol name"): {ADDR1}}) is False + assert r.evaluate({Offset(256, description="symbol name"): {ADDR1, ADDR2, ADDR3}}) is True def test_invalid_offset(): @@ -664,8 +664,8 @@ def test_explicit_string_values_int(): ) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) - assert (String("123") in children) == True - assert (String("0x123") in children) == True + assert (String("123") in children) is True + assert (String("0x123") in children) is True def test_string_values_special_characters(): @@ -683,8 +683,8 @@ def test_string_values_special_characters(): ) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) - assert (String("hello\r\nworld") in children) == True - assert (String("bye\nbye") in children) == True + assert (String("hello\r\nworld") in children) is True + assert (String("bye\nbye") in children) is True def test_substring_feature(): @@ -702,9 +702,9 @@ def test_substring_feature(): ) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) - assert (Substring("abc") in children) == True - assert (Substring("def") in children) == True - assert (Substring("gh\ni") in children) == True + assert (Substring("abc") in children) is True + assert (Substring("def") in children) is True + assert (Substring("gh\ni") in children) is True def test_substring_description(): @@ -721,7 +721,7 @@ def test_substring_description(): ) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) - assert (Substring("abc") in children) == True + assert (Substring("abc") in children) is True def test_filter_rules(): @@ -902,9 +902,9 @@ def test_function_name_features(): ) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) - assert (FunctionName("strcpy") in children) == True - assert (FunctionName("strcmp", description="copy from here to there") in children) == True - assert (FunctionName("strdup", description="duplicate a string") in children) == True + assert (FunctionName("strcpy") in children) is True + assert (FunctionName("strcmp", description="copy from here to there") in children) is True + assert (FunctionName("strdup", description="duplicate a string") in children) is True def test_os_features(): @@ -921,8 +921,8 @@ def test_os_features(): ) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) - assert (OS(OS_WINDOWS) in children) == True - assert (OS(OS_LINUX) not in children) == True + assert (OS(OS_WINDOWS) in children) is True + assert (OS(OS_LINUX) not in children) is True def test_format_features(): @@ -939,8 +939,8 @@ def test_format_features(): ) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) - assert (Format(FORMAT_PE) in children) == True - assert (Format(FORMAT_ELF) not in children) == True + assert (Format(FORMAT_PE) in children) is True + assert (Format(FORMAT_ELF) not in children) is True def test_arch_features(): @@ -957,8 +957,8 @@ def test_arch_features(): ) r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) - assert (Arch(ARCH_AMD64) in children) == True - assert (Arch(ARCH_I386) not in children) == True + assert (Arch(ARCH_AMD64) in children) is True + assert (Arch(ARCH_I386) not in children) is True def test_property_access(): @@ -973,10 +973,10 @@ def test_property_access(): """ ) ) - assert r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.READ): {ADDR1}}) == True + assert r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.READ): {ADDR1}}) is True - assert r.evaluate({Property("System.IO.FileInfo::Length"): {ADDR1}}) == False - assert r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.WRITE): {ADDR1}}) == False + assert r.evaluate({Property("System.IO.FileInfo::Length"): {ADDR1}}) is False + assert r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.WRITE): {ADDR1}}) is False def test_property_access_symbol(): @@ -992,8 +992,5 @@ def test_property_access_symbol(): ) ) assert ( - r.evaluate( - {Property("System.IO.FileInfo::Length", access=FeatureAccess.READ, description="some property"): {ADDR1}} - ) - == True + r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.READ, description="some property"): {ADDR1}}) is True ) From 89c6c235f7d895ff561068f79e2915996d721a82 Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Tue, 4 Apr 2023 18:46:31 +0530 Subject: [PATCH 05/14] resolve conflict --- .github/ruff.toml | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index ddf7fe94..d27e8915 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -1,6 +1,6 @@ select = ["E"] ignore = ["E402", "E722", "E902"] -exclude = ["*_pb2.py", "*_pb2.pyi", "capa2yara.py"] +exclude = ["*_pb2.py", "*_pb2.pyi", "capa2yara.py" , "test_engine.py"] # Same as pycodestyle. line-length = 180 diff --git a/setup.py b/setup.py index 3eca0124..dd680893 100644 --- a/setup.py +++ b/setup.py @@ -75,7 +75,7 @@ setuptools.setup( "pytest-cov==4.0.0", "pycodestyle==2.10.0", "ruff==0.0.259", - "black==23.1.0", + "black==23.3.0", "isort==5.11.4", "mypy==1.1.1", "psutil==5.9.2", From a40126aeff776b94de44c3dbbb7c66d5950ce924 Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Tue, 4 Apr 2023 19:10:40 +0530 Subject: [PATCH 06/14] reformatting with black --- .github/ruff.toml | 2 +- tests/test_engine.py | 23 ++++++++++++++++++----- tests/test_rules.py | 5 ++++- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index d27e8915..ddf7fe94 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -1,6 +1,6 @@ select = ["E"] ignore = ["E402", "E722", "E902"] -exclude = ["*_pb2.py", "*_pb2.pyi", "capa2yara.py" , "test_engine.py"] +exclude = ["*_pb2.py", "*_pb2.pyi", "capa2yara.py"] # Same as pycodestyle. line-length = 180 diff --git a/tests/test_engine.py b/tests/test_engine.py index 8806a59b..43412764 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -52,20 +52,33 @@ def test_some(): assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}}) is False assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}}) is False assert ( - Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}}) is True + Some(2, [Number(1), Number(2), Number(3)]).evaluate( + {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}} + ) + is True ) assert ( - Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}}) is True + Some(2, [Number(1), Number(2), Number(3)]).evaluate( + {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}} + ) + is True ) assert ( - Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}, Number(4): {ADDR1}}) is True + Some(2, [Number(1), Number(2), Number(3)]).evaluate( + {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}, Number(4): {ADDR1}} + ) + is True ) def test_complex(): - assert True is Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5), Number(6)])])]).evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) + assert True is Or( + [And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5), Number(6)])])] + ).evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) - assert False is Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5)])])]).evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) + assert False is Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5)])])]).evaluate( + {Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}} + ) def test_range(): diff --git a/tests/test_rules.py b/tests/test_rules.py index 132f478f..0a2e68d1 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -992,5 +992,8 @@ def test_property_access_symbol(): ) ) assert ( - r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.READ, description="some property"): {ADDR1}}) is True + r.evaluate( + {Property("System.IO.FileInfo::Length", access=FeatureAccess.READ, description="some property"): {ADDR1}} + ) + is True ) From 6641c8c9c909d27b51a52f89d17cf28cb70191d4 Mon Sep 17 00:00:00 2001 From: ooprathamm <89736193+ooprathamm@users.noreply.github.com> Date: Tue, 4 Apr 2023 23:07:04 +0530 Subject: [PATCH 07/14] fixing error issue Co-authored-by: Moritz --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a675ed58..37f9371a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -34,7 +34,7 @@ jobs: - name: Install dependencies run: pip install -e .[dev] - name: Lint with ruff - run: ruff --config .github/ruff.toml check . + run: ruff check --config .github/ruff.toml . - name: Lint with isort run: isort --profile black --length-sort --line-width 120 --skip-glob "*_pb2.py" -c . - name: Lint with black From 6eaa46ea9ae5f45500528c96ab3d35ea77d1ff30 Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Wed, 5 Apr 2023 13:32:15 +0530 Subject: [PATCH 08/14] revert bninja change --- .github/ruff.toml | 4 ++-- capa/features/extractors/binja/insn.py | 2 +- setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index ddf7fe94..38927883 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -1,6 +1,6 @@ select = ["E"] -ignore = ["E402", "E722", "E902"] -exclude = ["*_pb2.py", "*_pb2.pyi", "capa2yara.py"] +ignore = ["E402", "E722"] +exclude = ["*_pb2.py", "*_pb2.pyi"] # Same as pycodestyle. line-length = 180 diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index 99d5d6ef..23de37ce 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -439,7 +439,7 @@ def extract_insn_peb_access_characteristic_features( return True value = right.value.value - if (reg, value) not in (("fsbase", 48), ("gsbase", 96)): + if not (reg, value) in (("fsbase", 0x30), ("gsbase", 0x60)): return True results.append((Characteristic("peb access"), ih.address)) diff --git a/setup.py b/setup.py index c7688027..5e42f516 100644 --- a/setup.py +++ b/setup.py @@ -74,7 +74,7 @@ setuptools.setup( "pytest-instafail==0.5.0", "pytest-cov==4.0.0", "pycodestyle==2.10.0", - "ruff==0.0.259", + "ruff==0.0.260", "black==23.3.0", "isort==5.11.4", "mypy==1.1.1", From eef1548baad5b586a6832461a7a9365c52a9e363 Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Wed, 5 Apr 2023 16:28:00 +0530 Subject: [PATCH 09/14] fix capy2yara.py --- capa/features/extractors/binja/insn.py | 2 +- scripts/capa2yara.py | 26 +++++++++++++++++--------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index 23de37ce..fe5cd228 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -439,7 +439,7 @@ def extract_insn_peb_access_characteristic_features( return True value = right.value.value - if not (reg, value) in (("fsbase", 0x30), ("gsbase", 0x60)): + if (reg, value) not in (("fsbase", 0x30), ("gsbase", 0x60)): return True results.append((Characteristic("peb access"), ih.address)) diff --git a/scripts/capa2yara.py b/scripts/capa2yara.py index 067a2539..d5830dab 100644 --- a/scripts/capa2yara.py +++ b/scripts/capa2yara.py @@ -59,7 +59,8 @@ unsupported = ["characteristic", "mnemonic", "offset", "subscope", "Range"] # -- https://github.com/mandiant/capa-rules/blob/master/collection/file-managers/gather-direct-ftp-information.yml # -- https://github.com/mandiant/capa-rules/blob/master/collection/browser/gather-firefox-profile-information.yml # - count(string (1 rule: /executable/subfile/pe/contain-an-embedded-pe-file.yml) -# - count(match( could be done by creating the referenced rule a 2nd time with the condition, that it hits x times (only 1 rule: ./anti-analysis/anti-disasm/contain-anti-disasm-techniques.yml) +# - count(match( could be done by creating the referenced rule a 2nd time with the condition, that it hits x times +# (only 1 rule: ./anti-analysis/anti-disasm/contain-anti-disasm-techniques.yml) # - it would be technically possible to get the "basic blocks" working, but the rules contain mostly other non supported statements in there => not worth the effort. # collect all converted rules to be able to check if we have needed sub rules for match: @@ -128,7 +129,8 @@ def convert_capa_number_to_yara_bytes(number): def convert_rule_name(rule_name): - # yara rule names: "Identifiers must follow the same lexical conventions of the C programming language, they can contain any alphanumeric character and the underscore character, but the first character cannot be a digit. Rule identifiers are case sensitive and cannot exceed 128 characters." so we replace any non-alphanum with _ + # yara rule names: "Identifiers must follow the same lexical conventions of the C programming language, they can contain any alphanumeric character and the underscore character + # but the first character cannot be a digit. Rule identifiers are case sensitive and cannot exceed 128 characters." so we replace any non-alphanum with _ rule_name = re.sub(r"\W", "_", rule_name) rule_name = "capa_" + rule_name @@ -197,7 +199,8 @@ def convert_rule(rule, rulename, cround, depth): # even looking for empty string in dll_regex doesn't work for some files (list below) with pe.imports so do just a string search # yara_condition += '\tpe.imports(/.{0,30}/i, /' + api + '/) ' - # 5fbbfeed28b258c42e0cfeb16718b31c, 2D3EDC218A90F03089CC01715A9F047F, 7EFF498DE13CC734262F87E6B3EF38AB, C91887D861D9BD4A5872249B641BC9F9, a70052c45e907820187c7e6bcdc7ecca, 0596C4EA5AA8DEF47F22C85D75AACA95 + # 5fbbfeed28b258c42e0cfeb16718b31c, 2D3EDC218A90F03089CC01715A9F047F, 7EFF498DE13CC734262F87E6B3EF38AB, + # C91887D861D9BD4A5872249B641BC9F9, a70052c45e907820187c7e6bcdc7ecca, 0596C4EA5AA8DEF47F22C85D75AACA95 var_name = "api_" + var_names.pop(0) # limit regex with word boundary \b but also search for appended A and W @@ -286,7 +289,8 @@ def convert_rule(rule, rulename, cround, depth): # all .* in the regexes of capa look like they should be maximum 100 chars so take 1000 to speed up rules and prevent yara warnings on poor performance regex = regex.replace(".*", ".{,1000}") - # strange: capa accepts regexes with unescaped / like - string: /com/exe4j/runtime/exe4jcontroller/i in capa-rules/compiler/exe4j/compiled-with-exe4j.yml, needs a fix for yara: + # strange: capa accepts regexes with unescaped / + # like - string: /com/exe4j/runtime/exe4jcontroller/i in capa-rules/compiler/exe4j/compiled-with-exe4j.yml, needs a fix for yara: # would assume that get_value_str() gives the raw string regex = re.sub(r"(? /reg(.exe)?/ regex = re.sub(r"\(\|([^\)]+)\)", r"(\1)?", regex) - # change beginning of line to null byte, e.g. /^open => /\x00open (not word boundary because we're not looking for the beginning of a word in a text but usually a function name if there's ^ in a capa rule) + # change beginning of line to null byte, e.g. /^open => /\x00open + # (not word boundary because we're not looking for the beginning of a word in a text but usually a function name if there's ^ in a capa rule) regex = re.sub(r"^\^", r"\\x00", regex) # regex = re.sub(r"^\^", r"\\b", regex) @@ -420,7 +425,8 @@ def convert_rule(rule, rulename, cround, depth): ) # remove last 'or' # yara_condition = re.sub(r'\sor $', ' ', yara_condition) - rule_comment += "This rule is incomplete because a branch inside an Or-statement had an unsupported feature and was skipped => coverage is reduced compared to the original capa rule. " + rule_comment += "This rule is incomplete because a branch inside an Or-statement had an unsupported feature and was skipped " + rule_comment += "=> coverage is reduced compared to the original capa rule. " x += 1 incomplete = 1 continue @@ -446,7 +452,8 @@ def convert_rule(rule, rulename, cround, depth): + str(depth) ) - rule_comment += "This rule is incomplete because a branch inside an Or-statement had an unsupported feature and was skipped => coverage is reduced compared to the original capa rule. " + rule_comment += "This rule is incomplete because a branch inside an Or-statement had an unsupported feature and was skipped" + rule_comment += "=> coverage is reduced compared to the original capa rule. " x += 1 incomplete = 1 continue @@ -669,7 +676,8 @@ def convert_rules(rules, namespaces, cround, make_priv): yara += " condition:" + condition_header + yara_condition + "\n}" - # TODO: now the rule is finished and could be automatically checked with the capa-testfile(s) named in meta (doing it for all of them using yara-ci upload at the moment) + # TODO: now the rule is finished and could be automatically checked with the capa-testfile(s) named in meta + # (doing it for all of them using yara-ci upload at the moment) output_yar(yara) converted_rules.append(rule_name) count_incomplete += incomplete @@ -719,7 +727,7 @@ def main(argv=None): "// Rules from Mandiant's https://github.com/mandiant/capa-rules converted to YARA using https://github.com/mandiant/capa/blob/master/scripts/capa2yara.py by Arnim Rupp" ) output_yar( - "// Beware: These are less rules than capa (because not all fit into YARA, stats at EOF) and is less precise because e.g. capas function scopes are applied to the whole file" + "// Beware: These are less rules than capa (because not all fit into YARA, stats at EOF) and is less precise e.g. capas function scopes are applied to the whole file" ) output_yar( '// Beware: Some rules are incomplete because an optional branch was not supported by YARA. These rules are marked in a comment in meta: (search for "incomplete")' From 068ac0ca2ca085c9f71a82dbf16248da654774b1 Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Wed, 5 Apr 2023 16:29:53 +0530 Subject: [PATCH 10/14] fix black --- scripts/capa2yara.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/capa2yara.py b/scripts/capa2yara.py index d5830dab..9752c996 100644 --- a/scripts/capa2yara.py +++ b/scripts/capa2yara.py @@ -59,7 +59,7 @@ unsupported = ["characteristic", "mnemonic", "offset", "subscope", "Range"] # -- https://github.com/mandiant/capa-rules/blob/master/collection/file-managers/gather-direct-ftp-information.yml # -- https://github.com/mandiant/capa-rules/blob/master/collection/browser/gather-firefox-profile-information.yml # - count(string (1 rule: /executable/subfile/pe/contain-an-embedded-pe-file.yml) -# - count(match( could be done by creating the referenced rule a 2nd time with the condition, that it hits x times +# - count(match( could be done by creating the referenced rule a 2nd time with the condition, that it hits x times # (only 1 rule: ./anti-analysis/anti-disasm/contain-anti-disasm-techniques.yml) # - it would be technically possible to get the "basic blocks" working, but the rules contain mostly other non supported statements in there => not worth the effort. @@ -289,7 +289,7 @@ def convert_rule(rule, rulename, cround, depth): # all .* in the regexes of capa look like they should be maximum 100 chars so take 1000 to speed up rules and prevent yara warnings on poor performance regex = regex.replace(".*", ".{,1000}") - # strange: capa accepts regexes with unescaped / + # strange: capa accepts regexes with unescaped / # like - string: /com/exe4j/runtime/exe4jcontroller/i in capa-rules/compiler/exe4j/compiled-with-exe4j.yml, needs a fix for yara: # would assume that get_value_str() gives the raw string regex = re.sub(r"(? /reg(.exe)?/ regex = re.sub(r"\(\|([^\)]+)\)", r"(\1)?", regex) - # change beginning of line to null byte, e.g. /^open => /\x00open + # change beginning of line to null byte, e.g. /^open => /\x00open # (not word boundary because we're not looking for the beginning of a word in a text but usually a function name if there's ^ in a capa rule) regex = re.sub(r"^\^", r"\\x00", regex) @@ -676,7 +676,7 @@ def convert_rules(rules, namespaces, cround, make_priv): yara += " condition:" + condition_header + yara_condition + "\n}" - # TODO: now the rule is finished and could be automatically checked with the capa-testfile(s) named in meta + # TODO: now the rule is finished and could be automatically checked with the capa-testfile(s) named in meta # (doing it for all of them using yara-ci upload at the moment) output_yar(yara) converted_rules.append(rule_name) From 3d1ef51863e46ea602b2b668c956abdb5a87b75c Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Wed, 5 Apr 2023 17:33:05 +0530 Subject: [PATCH 11/14] revert --- capa/ida/plugin/form.py | 8 ++++---- tests/test_function_id.py | 6 +++--- tests/test_rules.py | 14 +++++++------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 75e04b9e..30f41f9f 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -607,8 +607,8 @@ class CapaExplorerForm(idaapi.PluginForm): except UserCancelledError as e: capa.ida.helpers.inform_user_ida_ui("Analysis requires capa rules") logger.warning( - f"You must specify a directory containing capa rules before running analysis. " - f"Download and extract the official rules from {CAPA_OFFICIAL_RULESET_URL} (recommended)." + "You must specify a directory containing capa rules before running analysis.%s", + f"Download and extract the official rules from {CAPA_OFFICIAL_RULESET_URL} (recommended).", ) return False except Exception as e: @@ -706,8 +706,8 @@ class CapaExplorerForm(idaapi.PluginForm): capa.ida.helpers.inform_user_ida_ui("Cached results were generated using different capas rules") logger.warning( - "capa is showing you cached results from a previous analysis run. " - "Your rules have changed since and you should reanalyze the program to see new results." + "capa is showing you cached results from a previous analysis run.%s ", + "Your rules have changed since and you should reanalyze the program to see new results.", ) view_status_rules = "no rules matched for cache" diff --git a/tests/test_function_id.py b/tests/test_function_id.py index f256db32..65af1ae0 100644 --- a/tests/test_function_id.py +++ b/tests/test_function_id.py @@ -4,20 +4,20 @@ import capa.features.insn def test_function_id_simple_match(pma16_01_extractor): - assert pma16_01_extractor.is_library_function(4224144) is True + assert pma16_01_extractor.is_library_function(0x407490) is True assert pma16_01_extractor.get_function_name(0x407490) == "__aulldiv" def test_function_id_gz_pat(pma16_01_extractor): # aullrem is stored in `test_aullrem.pat.gz` - assert pma16_01_extractor.is_library_function(4224256) is True + assert pma16_01_extractor.is_library_function(0x407500) is True assert pma16_01_extractor.get_function_name(0x407500) == "__aullrem" def test_function_id_complex_match(pma16_01_extractor): # 0x405714 is __spawnlp which requires recursive match of __spawnvp at 0x407FAB # (and __spawnvpe at 0x409DE8) - assert pma16_01_extractor.is_library_function(4216596) is True + assert pma16_01_extractor.is_library_function(0x405714) is True assert pma16_01_extractor.get_function_name(0x405714) == "__spawnlp" diff --git a/tests/test_rules.py b/tests/test_rules.py index 0a2e68d1..b5eab288 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -467,11 +467,11 @@ def test_number_symbol(): r = capa.rules.Rule.from_yaml(rule) children = list(r.statement.get_children()) assert (Number(1) in children) is True - assert (Number(4294967295) in children) is True + assert (Number(0xFFFFFFFF) in children) is True assert (Number(2, description="symbol name") in children) is True assert (Number(3, description="symbol name") in children) is True assert (Number(4, description="symbol name = another name") in children) is True - assert (Number(256, description="symbol name") in children) is True + assert (Number(0x100, description="symbol name") in children) is True def test_count_number_symbol(): @@ -491,8 +491,8 @@ def test_count_number_symbol(): assert r.evaluate({Number(2): set()}) is False assert r.evaluate({Number(2): {ADDR1}}) is True assert r.evaluate({Number(2): {ADDR1, ADDR2}}) is False - assert r.evaluate({Number(256, description="symbol name"): {ADDR1}}) is False - assert r.evaluate({Number(256, description="symbol name"): {ADDR1, ADDR2, ADDR3}}) is True + assert r.evaluate({Number(0x100, description="symbol name"): {ADDR1}}) is False + assert r.evaluate({Number(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}}) is True def test_invalid_number(): @@ -557,7 +557,7 @@ def test_offset_symbol(): assert (Offset(2, description="symbol name") in children) is True assert (Offset(3, description="symbol name") in children) is True assert (Offset(4, description="symbol name = another name") in children) is True - assert (Offset(256, description="symbol name") in children) is True + assert (Offset(0x100, description="symbol name") in children) is True def test_count_offset_symbol(): @@ -577,8 +577,8 @@ def test_count_offset_symbol(): assert r.evaluate({Offset(2): set()}) is False assert r.evaluate({Offset(2): {ADDR1}}) is True assert r.evaluate({Offset(2): {ADDR1, ADDR2}}) is False - assert r.evaluate({Offset(256, description="symbol name"): {ADDR1}}) is False - assert r.evaluate({Offset(256, description="symbol name"): {ADDR1, ADDR2, ADDR3}}) is True + assert r.evaluate({Offset(0x100, description="symbol name"): {ADDR1}}) is False + assert r.evaluate({Offset(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}}) is True def test_invalid_offset(): From e6248cd9edb9fef195db77a23586d9146500e486 Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Wed, 5 Apr 2023 17:43:11 +0530 Subject: [PATCH 12/14] solve failing binja --- capa/features/extractors/binja/insn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index fe5cd228..81243575 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -439,7 +439,7 @@ def extract_insn_peb_access_characteristic_features( return True value = right.value.value - if (reg, value) not in (("fsbase", 0x30), ("gsbase", 0x60)): + if not (reg, value) in (("fsbase", 0x30), ("gsbase", 0x60)): # noqa: E713 return True results.append((Characteristic("peb access"), ih.address)) From eedd88568347d06c8240a7dac6fae74d26216a34 Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Wed, 5 Apr 2023 17:44:57 +0530 Subject: [PATCH 13/14] fix black --- capa/features/extractors/binja/insn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index 81243575..95b0defe 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -439,7 +439,7 @@ def extract_insn_peb_access_characteristic_features( return True value = right.value.value - if not (reg, value) in (("fsbase", 0x30), ("gsbase", 0x60)): # noqa: E713 + if not (reg, value) in (("fsbase", 0x30), ("gsbase", 0x60)): # noqa: E713 return True results.append((Characteristic("peb access"), ih.address)) From efb07fafb3e2449303e3cbd1a4b5f928ccf67c9b Mon Sep 17 00:00:00 2001 From: Pratham Chauhan Date: Wed, 5 Apr 2023 22:16:00 +0530 Subject: [PATCH 14/14] fix --- .github/ruff.toml | 4 + capa/features/extractors/binja/insn.py | 2 +- tests/test_engine.py | 118 ++++++++++++++----------- tests/test_rules.py | 78 ++++++++-------- 4 files changed, 111 insertions(+), 91 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index 38927883..2740b0e0 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -1,4 +1,8 @@ +# Enable pycodestyle (`E`) codes select = ["E"] + +# E402 module level import not at top of file +# E722 do not use bare 'except' ignore = ["E402", "E722"] exclude = ["*_pb2.py", "*_pb2.pyi"] diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index 95b0defe..fe5cd228 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -439,7 +439,7 @@ def extract_insn_peb_access_characteristic_features( return True value = right.value.value - if not (reg, value) in (("fsbase", 0x30), ("gsbase", 0x60)): # noqa: E713 + if (reg, value) not in (("fsbase", 0x30), ("gsbase", 0x60)): return True results.append((Characteristic("peb access"), ih.address)) diff --git a/tests/test_engine.py b/tests/test_engine.py index 43412764..c070edb8 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -17,106 +17,116 @@ ADDR4 = capa.features.address.AbsoluteVirtualAddress(0x401004) def test_number(): - assert Number(1).evaluate({Number(0): {ADDR1}}) is False - assert Number(1).evaluate({Number(1): {ADDR1}}) is True - assert Number(1).evaluate({Number(2): {ADDR1, ADDR2}}) is False + assert bool(Number(1).evaluate({Number(0): {ADDR1}})) is False + assert bool(Number(1).evaluate({Number(1): {ADDR1}})) is True + assert bool(Number(1).evaluate({Number(2): {ADDR1, ADDR2}})) is False def test_and(): - assert And([Number(1)]).evaluate({Number(0): {ADDR1}}) is False - assert And([Number(1)]).evaluate({Number(1): {ADDR1}}) is True - assert And([Number(1), Number(2)]).evaluate({Number(0): {ADDR1}}) is False - assert And([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) is False - assert And([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}) is False - assert And([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR2}}) is True + assert bool(And([Number(1)]).evaluate({Number(0): {ADDR1}})) is False + assert bool(And([Number(1)]).evaluate({Number(1): {ADDR1}})) is True + assert bool(And([Number(1), Number(2)]).evaluate({Number(0): {ADDR1}})) is False + assert bool(And([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}})) is False + assert bool(And([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}})) is False + assert bool(And([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR2}})) is True def test_or(): - assert Or([Number(1)]).evaluate({Number(0): {ADDR1}}) is False - assert Or([Number(1)]).evaluate({Number(1): {ADDR1}}) is True - assert Or([Number(1), Number(2)]).evaluate({Number(0): {ADDR1}}) is False - assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) is True - assert Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}) is True - assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR2}}) is True + assert bool(Or([Number(1)]).evaluate({Number(0): {ADDR1}})) is False + assert bool(Or([Number(1)]).evaluate({Number(1): {ADDR1}})) is True + assert bool(Or([Number(1), Number(2)]).evaluate({Number(0): {ADDR1}})) is False + assert bool(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}})) is True + assert bool(Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}})) is True + assert bool(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR2}})) is True def test_not(): - assert Not(Number(1)).evaluate({Number(0): {ADDR1}}) is True - assert Not(Number(1)).evaluate({Number(1): {ADDR1}}) is False + assert bool(Not(Number(1)).evaluate({Number(0): {ADDR1}})) is True + assert bool(Not(Number(1)).evaluate({Number(1): {ADDR1}})) is False def test_some(): - assert Some(0, [Number(1)]).evaluate({Number(0): {ADDR1}}) is True - assert Some(1, [Number(1)]).evaluate({Number(0): {ADDR1}}) is False + assert bool(Some(0, [Number(1)]).evaluate({Number(0): {ADDR1}})) is True + assert bool(Some(1, [Number(1)]).evaluate({Number(0): {ADDR1}})) is False - assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}}) is False - assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}}) is False + assert bool(Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}})) is False + assert bool(Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}})) is False assert ( - Some(2, [Number(1), Number(2), Number(3)]).evaluate( - {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}} + bool( + Some(2, [Number(1), Number(2), Number(3)]).evaluate( + {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}} + ) ) is True ) assert ( - Some(2, [Number(1), Number(2), Number(3)]).evaluate( - {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}} + bool( + Some(2, [Number(1), Number(2), Number(3)]).evaluate( + {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}} + ) ) is True ) assert ( - Some(2, [Number(1), Number(2), Number(3)]).evaluate( - {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}, Number(4): {ADDR1}} + bool( + Some(2, [Number(1), Number(2), Number(3)]).evaluate( + {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}, Number(4): {ADDR1}} + ) ) is True ) def test_complex(): - assert True is Or( - [And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5), Number(6)])])] - ).evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) + assert True is bool( + Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5), Number(6)])])]).evaluate( + {Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}} + ) + ) - assert False is Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5)])])]).evaluate( - {Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}} + assert False is bool( + Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5)])])]).evaluate( + {Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}} + ) ) def test_range(): # unbounded range, but no matching feature # since the lower bound is zero, and there are zero matches, ok - assert Range(Number(1)).evaluate({Number(2): {}}) is True + assert bool(Range(Number(1)).evaluate({Number(2): {}})) is True # unbounded range with matching feature should always match - assert Range(Number(1)).evaluate({Number(1): {}}) is True - assert Range(Number(1)).evaluate({Number(1): {ADDR1}}) is True + assert bool(Range(Number(1)).evaluate({Number(1): {}})) is True + assert bool(Range(Number(1)).evaluate({Number(1): {ADDR1}})) is True # unbounded max - assert Range(Number(1), min=1).evaluate({Number(1): {ADDR1}}) is True - assert Range(Number(1), min=2).evaluate({Number(1): {ADDR1}}) is False - assert Range(Number(1), min=2).evaluate({Number(1): {ADDR1, ADDR2}}) is True + assert bool(Range(Number(1), min=1).evaluate({Number(1): {ADDR1}})) is True + assert bool(Range(Number(1), min=2).evaluate({Number(1): {ADDR1}})) is False + assert bool(Range(Number(1), min=2).evaluate({Number(1): {ADDR1, ADDR2}})) is True # unbounded min - assert Range(Number(1), max=0).evaluate({Number(1): {ADDR1}}) is False - assert Range(Number(1), max=1).evaluate({Number(1): {ADDR1}}) is True - assert Range(Number(1), max=2).evaluate({Number(1): {ADDR1}}) is True - assert Range(Number(1), max=2).evaluate({Number(1): {ADDR1, ADDR2}}) is True - assert Range(Number(1), max=2).evaluate({Number(1): {ADDR1, ADDR2, ADDR3}}) is False + assert bool(Range(Number(1), max=0).evaluate({Number(1): {ADDR1}})) is False + assert bool(Range(Number(1), max=1).evaluate({Number(1): {ADDR1}})) is True + assert bool(Range(Number(1), max=2).evaluate({Number(1): {ADDR1}})) is True + assert bool(Range(Number(1), max=2).evaluate({Number(1): {ADDR1, ADDR2}})) is True + assert bool(Range(Number(1), max=2).evaluate({Number(1): {ADDR1, ADDR2, ADDR3}})) is False # we can do an exact match by setting min==max - assert Range(Number(1), min=1, max=1).evaluate({Number(1): {}}) is False - assert Range(Number(1), min=1, max=1).evaluate({Number(1): {ADDR1}}) is True - assert Range(Number(1), min=1, max=1).evaluate({Number(1): {ADDR1, ADDR2}}) is False + assert bool(Range(Number(1), min=1, max=1).evaluate({Number(1): {}})) is False + assert bool(Range(Number(1), min=1, max=1).evaluate({Number(1): {ADDR1}})) is True + assert bool(Range(Number(1), min=1, max=1).evaluate({Number(1): {ADDR1, ADDR2}})) is False # bounded range - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {}}) is False - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1}}) is True - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2}}) is True - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2, ADDR3}}) is True - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2, ADDR3, ADDR4}}) is False + assert bool(Range(Number(1), min=1, max=3).evaluate({Number(1): {}})) is False + assert bool(Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1}})) is True + assert bool(Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2}})) is True + assert bool(Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2, ADDR3}})) is True + assert bool(Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2, ADDR3, ADDR4}})) is False def test_short_circuit(): - assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) is True + assert bool(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}})) is True # with short circuiting, only the children up until the first satisfied child are captured. assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}, short_circuit=True).children) == 1 @@ -125,8 +135,8 @@ def test_short_circuit(): def test_eval_order(): # base cases. - assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) is True - assert Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}) is True + assert bool(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}})) is True + assert bool(Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}})) is True # with short circuiting, only the children up until the first satisfied child are captured. assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}).children) == 1 diff --git a/tests/test_rules.py b/tests/test_rules.py index b5eab288..9f07f31d 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -40,8 +40,8 @@ ADDR4 = capa.features.address.AbsoluteVirtualAddress(0x401004) def test_rule_ctor(): r = capa.rules.Rule("test rule", capa.rules.FUNCTION_SCOPE, Or([Number(1)]), {}) - assert r.evaluate({Number(0): {ADDR1}}) is False - assert r.evaluate({Number(1): {ADDR2}}) is True + assert bool(r.evaluate({Number(0): {ADDR1}})) is False + assert bool(r.evaluate({Number(1): {ADDR2}})) is True def test_rule_yaml(): @@ -63,10 +63,10 @@ def test_rule_yaml(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(0): {ADDR1}}) is False - assert r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}}) is False - assert r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}}) is True - assert r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}}) is True + assert bool(r.evaluate({Number(0): {ADDR1}})) is False + assert bool(r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}})) is False + assert bool(r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}})) is True + assert bool(r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}})) is True def test_rule_yaml_complex(): @@ -89,8 +89,8 @@ def test_rule_yaml_complex(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) is True - assert r.evaluate({Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) is False + assert bool(r.evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}})) is True + assert bool(r.evaluate({Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}})) is False def test_rule_descriptions(): @@ -167,8 +167,8 @@ def test_rule_yaml_not(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(1): {ADDR1}}) is True - assert r.evaluate({Number(1): {ADDR1}, Number(2): {ADDR1}}) is False + assert bool(r.evaluate({Number(1): {ADDR1}})) is True + assert bool(r.evaluate({Number(1): {ADDR1}, Number(2): {ADDR1}})) is False def test_rule_yaml_count(): @@ -182,9 +182,9 @@ def test_rule_yaml_count(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(100): set()}) is False - assert r.evaluate({Number(100): {ADDR1}}) is True - assert r.evaluate({Number(100): {ADDR1, ADDR2}}) is False + assert bool(r.evaluate({Number(100): set()})) is False + assert bool(r.evaluate({Number(100): {ADDR1}})) is True + assert bool(r.evaluate({Number(100): {ADDR1, ADDR2}})) is False def test_rule_yaml_count_range(): @@ -198,10 +198,10 @@ def test_rule_yaml_count_range(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(100): set()}) is False - assert r.evaluate({Number(100): {ADDR1}}) is True - assert r.evaluate({Number(100): {ADDR1, ADDR2}}) is True - assert r.evaluate({Number(100): {ADDR1, ADDR2, ADDR3}}) is False + assert bool(r.evaluate({Number(100): set()})) is False + assert bool(r.evaluate({Number(100): {ADDR1}})) is True + assert bool(r.evaluate({Number(100): {ADDR1, ADDR2}})) is True + assert bool(r.evaluate({Number(100): {ADDR1, ADDR2, ADDR3}})) is False def test_rule_yaml_count_string(): @@ -215,10 +215,10 @@ def test_rule_yaml_count_string(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({String("foo"): set()}) is False - assert r.evaluate({String("foo"): {ADDR1}}) is False - assert r.evaluate({String("foo"): {ADDR1, ADDR2}}) is True - assert r.evaluate({String("foo"): {ADDR1, ADDR2, ADDR3}}) is False + assert bool(r.evaluate({String("foo"): set()})) is False + assert bool(r.evaluate({String("foo"): {ADDR1}})) is False + assert bool(r.evaluate({String("foo"): {ADDR1, ADDR2}})) is True + assert bool(r.evaluate({String("foo"): {ADDR1, ADDR2, ADDR3}})) is False def test_invalid_rule_feature(): @@ -488,11 +488,11 @@ def test_count_number_symbol(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(2): set()}) is False - assert r.evaluate({Number(2): {ADDR1}}) is True - assert r.evaluate({Number(2): {ADDR1, ADDR2}}) is False - assert r.evaluate({Number(0x100, description="symbol name"): {ADDR1}}) is False - assert r.evaluate({Number(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}}) is True + assert bool(r.evaluate({Number(2): set()})) is False + assert bool(r.evaluate({Number(2): {ADDR1}})) is True + assert bool(r.evaluate({Number(2): {ADDR1, ADDR2}})) is False + assert bool(r.evaluate({Number(0x100, description="symbol name"): {ADDR1}})) is False + assert bool(r.evaluate({Number(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}})) is True def test_invalid_number(): @@ -574,11 +574,11 @@ def test_count_offset_symbol(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Offset(2): set()}) is False - assert r.evaluate({Offset(2): {ADDR1}}) is True - assert r.evaluate({Offset(2): {ADDR1, ADDR2}}) is False - assert r.evaluate({Offset(0x100, description="symbol name"): {ADDR1}}) is False - assert r.evaluate({Offset(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}}) is True + assert bool(r.evaluate({Offset(2): set()})) is False + assert bool(r.evaluate({Offset(2): {ADDR1}})) is True + assert bool(r.evaluate({Offset(2): {ADDR1, ADDR2}})) is False + assert bool(r.evaluate({Offset(0x100, description="symbol name"): {ADDR1}})) is False + assert bool(r.evaluate({Offset(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}})) is True def test_invalid_offset(): @@ -973,10 +973,10 @@ def test_property_access(): """ ) ) - assert r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.READ): {ADDR1}}) is True + assert bool(r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.READ): {ADDR1}})) is True - assert r.evaluate({Property("System.IO.FileInfo::Length"): {ADDR1}}) is False - assert r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.WRITE): {ADDR1}}) is False + assert bool(r.evaluate({Property("System.IO.FileInfo::Length"): {ADDR1}})) is False + assert bool(r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.WRITE): {ADDR1}})) is False def test_property_access_symbol(): @@ -992,8 +992,14 @@ def test_property_access_symbol(): ) ) assert ( - r.evaluate( - {Property("System.IO.FileInfo::Length", access=FeatureAccess.READ, description="some property"): {ADDR1}} + bool( + r.evaluate( + { + Property("System.IO.FileInfo::Length", access=FeatureAccess.READ, description="some property"): { + ADDR1 + } + } + ) ) is True )