From 78a9909ec6e485743911fa47ff398e03b69329de Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Sun, 23 Jul 2023 02:33:14 +0530 Subject: [PATCH 01/24] Update elffile.py Updated changelog and added link references in comments --- CHANGELOG.md | 1 + capa/features/extractors/elffile.py | 43 +++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64996533..94cc890a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## master (unreleased) ### New Features +- ELF: implement file import and export name extractor #1607 @Aayush-Goel-04 ### Breaking Changes diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index da64a412..aa8bfa65 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -13,7 +13,7 @@ from pathlib import Path from elftools.elf.elffile import ELFFile, SymbolTableSection import capa.features.extractors.common -from capa.features.file import Import, Section +from capa.features.file import Export, Import, Section from capa.features.common import OS, FORMAT_ELF, Arch, Format, Feature from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress from capa.features.extractors.base_extractor import FeatureExtractor @@ -21,11 +21,8 @@ from capa.features.extractors.base_extractor import FeatureExtractor logger = logging.getLogger(__name__) -def extract_file_import_names(elf, **kwargs): - # see https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/scripts/readelf.py#L372 - symbol_tables = [(idx, s) for idx, s in enumerate(elf.iter_sections()) if isinstance(s, SymbolTableSection)] - - for _, section in symbol_tables: +def extract_file_export_names(elf, **kwargs): + for _, section in enumerate(elf.iter_sections()): if not isinstance(section, SymbolTableSection): continue @@ -36,10 +33,33 @@ def extract_file_import_names(elf, **kwargs): logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_symbols()) for _, symbol in enumerate(section.iter_symbols()): - if symbol.name and symbol.entry.st_info.type == "STT_FUNC": - # TODO(williballenthin): extract symbol address - # https://github.com/mandiant/capa/issues/1608 - yield Import(symbol.name), FileOffsetAddress(0x0) + # The following conditions are based on the following article + # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html + if symbol.name and symbol.entry.st_info.type in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]: + if symbol.entry.st_value != 0 and symbol.entry.st_shndx != "SHN_UNDEF": + # Export symbol + yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value) + + +def extract_file_import_names(elf, **kwargs): + for _, section in enumerate(elf.iter_sections()): + if not isinstance(section, SymbolTableSection): + continue + + if section["sh_entsize"] == 0: + logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name) + continue + + logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_symbols()) + + for _, symbol in enumerate(section.iter_symbols()): + # The following conditions are based on the following article + # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html + if symbol.name and symbol.entry.st_info.type in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]: + if symbol.entry.st_value == 0 and symbol.entry.st_shndx == "SHN_UNDEF" and symbol.entry.st_name != 0: + # TODO(williballenthin): extract symbol address + # https://github.com/mandiant/capa/issues/1608 + yield Import(symbol.name), FileOffsetAddress(0x0) def extract_file_section_names(elf, **kwargs): @@ -85,8 +105,7 @@ def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, i FILE_HANDLERS = ( - # TODO(williballenthin): implement extract_file_export_names - # https://github.com/mandiant/capa/issues/1607 + extract_file_export_names, extract_file_import_names, extract_file_section_names, extract_file_strings, From 8f826cb92d39960987e3b590fad2a9fcf33f27a8 Mon Sep 17 00:00:00 2001 From: Xusheng Date: Mon, 24 Jul 2023 19:13:10 +0800 Subject: [PATCH 02/24] Fix binja backend stack string detection. Re-enable binja stack string unit test --- CHANGELOG.md | 2 ++ capa/features/extractors/binja/basicblock.py | 5 +++-- tests/test_binja_features.py | 3 --- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64996533..59a1a14c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ ### Bug Fixes +- Fix binja backend stack string detection. [#1473](https://github.com/mandiant/capa/issues/1473) [@xusheng6](https://github.com/xusheng6) + ### capa explorer IDA Pro plugin ### Development diff --git a/capa/features/extractors/binja/basicblock.py b/capa/features/extractors/binja/basicblock.py index 30af516d..568ecc7a 100644 --- a/capa/features/extractors/binja/basicblock.py +++ b/capa/features/extractors/binja/basicblock.py @@ -75,10 +75,11 @@ def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int: return 0 dest = il.params[0] - if dest.operation != MediumLevelILOperation.MLIL_ADDRESS_OF: + if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]: + var = dest.src + else: return 0 - var = dest.src if var.source_type != VariableSourceType.StackVariableSourceType: return 0 diff --git a/tests/test_binja_features.py b/tests/test_binja_features.py index 4daaa790..a2f0cd78 100644 --- a/tests/test_binja_features.py +++ b/tests/test_binja_features.py @@ -40,9 +40,6 @@ except ImportError: indirect=["sample", "scope"], ) def test_binja_features(sample, scope, feature, expected): - if feature == capa.features.common.Characteristic("stack string"): - pytest.xfail("skip failing Binja stack string detection temporarily, see #1473") - if isinstance(feature, capa.features.file.Export) and "." in str(feature.value): pytest.xfail("skip Binja unsupported forwarded export feature, see #1646") From 557e83b1dc4dd23ddf31c8cf79ef77ad59ef4270 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jul 2023 14:15:37 +0000 Subject: [PATCH 03/24] build(deps-dev): bump ruff from 0.0.278 to 0.0.280 Bumps [ruff](https://github.com/astral-sh/ruff) from 0.0.278 to 0.0.280. - [Release notes](https://github.com/astral-sh/ruff/releases) - [Changelog](https://github.com/astral-sh/ruff/blob/main/BREAKING_CHANGES.md) - [Commits](https://github.com/astral-sh/ruff/compare/v0.0.278...v0.0.280) --- updated-dependencies: - dependency-name: ruff dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a28e244c..fd68ab55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,7 @@ dev = [ "flake8-simplify==0.20.0", "flake8-use-pathlib==0.3.0", "flake8-copyright==0.2.4", - "ruff==0.0.278", + "ruff==0.0.280", "black==23.7.0", "isort==5.11.4", "mypy==1.4.1", From 684b2ded38b5ca4113dfbfb3fb757d020aebbad7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jul 2023 14:15:43 +0000 Subject: [PATCH 04/24] build(deps-dev): bump pyyaml from 6.0 to 6.0.1 Bumps [pyyaml](https://github.com/yaml/pyyaml) from 6.0 to 6.0.1. - [Changelog](https://github.com/yaml/pyyaml/blob/6.0.1/CHANGES) - [Commits](https://github.com/yaml/pyyaml/compare/6.0...6.0.1) --- updated-dependencies: - dependency-name: pyyaml dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a28e244c..cb48f843 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ classifiers = [ ] dependencies = [ "tqdm==4.65.0", - "pyyaml==6.0", + "pyyaml==6.0.1", "tabulate==0.9.0", "colorama==0.4.6", "termcolor==2.3.0", From 3d51b84bd17dfb9994e5557b5231456a632292af Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jul 2023 14:15:50 +0000 Subject: [PATCH 05/24] build(deps-dev): bump types-protobuf from 4.23.0.1 to 4.23.0.2 Bumps [types-protobuf](https://github.com/python/typeshed) from 4.23.0.1 to 4.23.0.2. - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-protobuf dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a28e244c..eaea7364 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,7 +93,7 @@ dev = [ "types-termcolor==1.1.4", "types-psutil==5.8.23", "types_requests==2.31.0.1", - "types-protobuf==4.23.0.1", + "types-protobuf==4.23.0.2", ] build = [ "pyinstaller==5.10.1", From b554eaf5639e44966c20e037fcc634c10081bc21 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jul 2023 14:15:55 +0000 Subject: [PATCH 06/24] build(deps-dev): bump types-requests from 2.31.0.1 to 2.31.0.2 Bumps [types-requests](https://github.com/python/typeshed) from 2.31.0.1 to 2.31.0.2. - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-requests dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a28e244c..57c91730 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,7 +92,7 @@ dev = [ "types-tabulate==0.9.0.1", "types-termcolor==1.1.4", "types-psutil==5.8.23", - "types_requests==2.31.0.1", + "types_requests==2.31.0.2", "types-protobuf==4.23.0.1", ] build = [ From c265b1ca96a2d06e5a318bcc4b5d5351e08bfa9b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jul 2023 14:15:59 +0000 Subject: [PATCH 07/24] build(deps-dev): bump types-tabulate from 0.9.0.1 to 0.9.0.3 Bumps [types-tabulate](https://github.com/python/typeshed) from 0.9.0.1 to 0.9.0.3. - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-tabulate dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a28e244c..21f8f50d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,7 +89,7 @@ dev = [ "types-backports==0.1.3", "types-colorama==0.4.15.11", "types-PyYAML==6.0.8", - "types-tabulate==0.9.0.1", + "types-tabulate==0.9.0.3", "types-termcolor==1.1.4", "types-psutil==5.8.23", "types_requests==2.31.0.1", From 5f8280eb09b4e6adb13a345be2bb8845d900f0b1 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 1 Aug 2023 11:16:09 +0000 Subject: [PATCH 08/24] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index a49c174f..6691e9bd 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit a49c174fee5058ca3617a23e782bdcadacb12406 +Subproject commit 6691e9bdbe430f7de84a9ec88911314b44d8a91a From d12185d851bb65e89444dc7076efb54643c32648 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 1 Aug 2023 11:21:02 +0000 Subject: [PATCH 09/24] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index bfcf387b..a34ba30f 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit bfcf387b5b55e65ef02ceb0cc5191ebe49ad5614 +Subproject commit a34ba30f8c5cee29b7539a56038f4976f8883b2b From ed98bb3a577cf926dc4be2e6c280a903d891c79c Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 1 Aug 2023 11:21:32 +0000 Subject: [PATCH 10/24] Sync capa rules submodule --- CHANGELOG.md | 2 +- rules | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59a1a14c..cf18ff9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ ### Breaking Changes -### New Rules (1) +### New Rules (2) - executable/pe/export/forwarded-export ronnie.salomonsen@mandiant.com - diff --git a/rules b/rules index 6691e9bd..3f39a454 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 6691e9bdbe430f7de84a9ec88911314b44d8a91a +Subproject commit 3f39a454650c75a26ed7e399af541007b92e00b7 From dd2eef52c309a949422a60190748501551bf9732 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Tue, 1 Aug 2023 22:21:00 +0530 Subject: [PATCH 11/24] Update elffile.py remove enumerate --- capa/features/extractors/elffile.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index aa8bfa65..94ae6618 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -21,8 +21,8 @@ from capa.features.extractors.base_extractor import FeatureExtractor logger = logging.getLogger(__name__) -def extract_file_export_names(elf, **kwargs): - for _, section in enumerate(elf.iter_sections()): +def extract_file_export_names(elf: ELFFile, **kwargs): + for section in elf.iter_sections(): if not isinstance(section, SymbolTableSection): continue @@ -32,7 +32,7 @@ def extract_file_export_names(elf, **kwargs): logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_symbols()) - for _, symbol in enumerate(section.iter_symbols()): + for symbol in section.iter_symbols(): # The following conditions are based on the following article # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html if symbol.name and symbol.entry.st_info.type in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]: @@ -41,8 +41,8 @@ def extract_file_export_names(elf, **kwargs): yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value) -def extract_file_import_names(elf, **kwargs): - for _, section in enumerate(elf.iter_sections()): +def extract_file_import_names(elf: ELFFile, **kwargs): + for section in elf.iter_sections(): if not isinstance(section, SymbolTableSection): continue @@ -52,7 +52,7 @@ def extract_file_import_names(elf, **kwargs): logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_symbols()) - for _, symbol in enumerate(section.iter_symbols()): + for symbol in section.iter_symbols(): # The following conditions are based on the following article # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html if symbol.name and symbol.entry.st_info.type in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]: From 3bb5754b66d3876912d5d9c3a01baf40cee8f4fd Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Tue, 1 Aug 2023 22:41:11 +0530 Subject: [PATCH 12/24] Update elffile.py --- capa/features/extractors/elffile.py | 33 +++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index 94ae6618..535435c3 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -35,10 +35,16 @@ def extract_file_export_names(elf: ELFFile, **kwargs): for symbol in section.iter_symbols(): # The following conditions are based on the following article # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html - if symbol.name and symbol.entry.st_info.type in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]: - if symbol.entry.st_value != 0 and symbol.entry.st_shndx != "SHN_UNDEF": - # Export symbol - yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value) + if not symbol.name: + continue + if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]: + continue + if symbol.entry.st_value == 0: + continue + if symbol.entry.st_shndx == "SHN_UNDEF": + continue + + yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value) def extract_file_import_names(elf: ELFFile, **kwargs): @@ -55,11 +61,20 @@ def extract_file_import_names(elf: ELFFile, **kwargs): for symbol in section.iter_symbols(): # The following conditions are based on the following article # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html - if symbol.name and symbol.entry.st_info.type in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]: - if symbol.entry.st_value == 0 and symbol.entry.st_shndx == "SHN_UNDEF" and symbol.entry.st_name != 0: - # TODO(williballenthin): extract symbol address - # https://github.com/mandiant/capa/issues/1608 - yield Import(symbol.name), FileOffsetAddress(0x0) + if not symbol.name: + continue + if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]: + continue + if symbol.entry.st_value != 0: + continue + if symbol.entry.st_shndx != "SHN_UNDEF": + continue + if symbol.entry.st_name == 0: + continue + + # TODO(williballenthin): extract symbol address + # https://github.com/mandiant/capa/issues/1608 + yield Import(symbol.name), FileOffsetAddress(0x0) def extract_file_section_names(elf, **kwargs): From f8c499fb43a19ed67bcb68e308bc66848660dddc Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Wed, 2 Aug 2023 11:52:27 +0530 Subject: [PATCH 13/24] Added test for elf import/export handling --- capa/features/extractors/elffile.py | 6 +-- tests/test_elffile_features.py | 69 +++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 tests/test_elffile_features.py diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index 535435c3..9d6de336 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -77,7 +77,7 @@ def extract_file_import_names(elf: ELFFile, **kwargs): yield Import(symbol.name), FileOffsetAddress(0x0) -def extract_file_section_names(elf, **kwargs): +def extract_file_section_names(elf: ELFFile, **kwargs): for section in elf.iter_sections(): if section.name: yield Section(section.name), AbsoluteVirtualAddress(section.header.sh_addr) @@ -89,7 +89,7 @@ def extract_file_strings(buf, **kwargs): yield from capa.features.extractors.common.extract_file_strings(buf) -def extract_file_os(elf, buf, **kwargs): +def extract_file_os(elf: ELFFile, buf, **kwargs): # our current approach does not always get an OS value, e.g. for packed samples # for file limitation purposes, we're more lax here try: @@ -103,7 +103,7 @@ def extract_file_format(**kwargs): yield Format(FORMAT_ELF), NO_ADDRESS -def extract_file_arch(elf, **kwargs): +def extract_file_arch(elf: ELFFile, **kwargs): arch = elf.get_machine_arch() if arch == "x86": yield Arch("i386"), NO_ADDRESS diff --git a/tests/test_elffile_features.py b/tests/test_elffile_features.py new file mode 100644 index 00000000..0b4e2fca --- /dev/null +++ b/tests/test_elffile_features.py @@ -0,0 +1,69 @@ +import io +from pathlib import Path + +from elftools.elf.elffile import ELFFile + +from capa.features.extractors.elffile import extract_file_export_names, extract_file_import_names + +CD = Path(__file__).resolve().parent +SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_" + + +def test_elffile_import_features(): + expected_imports = [ + "memfrob", + "puts", + "__libc_start_main", + "malloc", + "__cxa_finalize", + "memfrob@@GLIBC_2.2.5", + "puts@@GLIBC_2.2.5", + "__libc_start_main@@GLIBC_2.2.5", + "malloc@@GLIBC_2.2.5", + "__cxa_finalize@@GLIBC_2.2.5", + ] + path = Path(SAMPLE_PATH) + elf = ELFFile(io.BytesIO(path.read_bytes())) + # Extract imports + imports = list(extract_file_import_names(elf)) + + # Verify that at least one import was found + assert len(imports) > 0, "No imports were found." + + # Extract the symbol names from the extracted imports + extracted_symbol_names = [imported[0].value for imported in imports] + + # Check if all expected symbol names are found + for symbol_name in expected_imports: + assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports." + + +def test_elffile_export_features(): + expected_exports = [ + "deregister_tm_clones", + "register_tm_clones", + "__do_global_dtors_aux", + "completed.8060", + "__do_global_dtors_aux_fini_array_entry", + "frame_dummy", + "_init", + "__libc_csu_fini", + "_fini", + "__dso_handle", + "_IO_stdin_used", + "__libc_csu_init", + ] + path = Path(SAMPLE_PATH) + elf = ELFFile(io.BytesIO(path.read_bytes())) + # Extract imports + exports = list(extract_file_export_names(elf)) + + # Verify that at least one export was found + assert len(exports) > 0, "No exports were found." + + # Extract the symbol names from the extracted imports + extracted_symbol_names = [exported[0].value for exported in exports] + + # Check if all expected symbol names are found + for symbol_name in expected_exports: + assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports." From 26935ee6e6026a8385cdb38e3284cadcef5adfcd Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Wed, 2 Aug 2023 13:51:51 +0530 Subject: [PATCH 14/24] Update test_elffile_features.py --- tests/test_elffile_features.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_elffile_features.py b/tests/test_elffile_features.py index 0b4e2fca..17fab010 100644 --- a/tests/test_elffile_features.py +++ b/tests/test_elffile_features.py @@ -1,3 +1,10 @@ +# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. import io from pathlib import Path From d18224eac6fce5203af2a2f8281c2a895caaa812 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 2 Aug 2023 11:03:16 +0000 Subject: [PATCH 15/24] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index a34ba30f..fea6e5f9 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit a34ba30f8c5cee29b7539a56038f4976f8883b2b +Subproject commit fea6e5f90c031abed017fb3e43e12323c764c2c0 From 95ffdf19ff17707113de8409abf2c545ad6cae56 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 2 Aug 2023 11:03:52 +0000 Subject: [PATCH 16/24] Sync capa rules submodule --- CHANGELOG.md | 4 +++- README.md | 2 +- rules | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf18ff9c..6ec36f5d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,9 +6,11 @@ ### Breaking Changes -### New Rules (2) +### New Rules (4) - executable/pe/export/forwarded-export ronnie.salomonsen@mandiant.com +- host-interaction/bootloader/get-uefi-variable jakub.jozwiak@mandiant.com +- host-interaction/bootloader/set-uefi-variable jakub.jozwiak@mandiant.com - ### Bug Fixes diff --git a/README.md b/README.md index 1c08af30..1bf6b159 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-824-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-826-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 3f39a454..bd14f146 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 3f39a454650c75a26ed7e399af541007b92e00b7 +Subproject commit bd14f146d3a52e2c3377e93ee4aed810a3bb892e From 62f50265bca4a12eb5d93a1f3b8de75f7524a860 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Wed, 2 Aug 2023 16:41:24 +0530 Subject: [PATCH 17/24] Resolved Import address --- capa/features/extractors/elffile.py | 33 ++++++++++++++++++++--------- tests/test_elffile_features.py | 5 ----- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index 9d6de336..8ed74e87 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -11,6 +11,7 @@ from typing import Tuple, Iterator from pathlib import Path from elftools.elf.elffile import ELFFile, SymbolTableSection +from elftools.elf.relocation import RelocationSection import capa.features.extractors.common from capa.features.file import Export, Import, Section @@ -48,17 +49,15 @@ def extract_file_export_names(elf: ELFFile, **kwargs): def extract_file_import_names(elf: ELFFile, **kwargs): + # Create a dictionary to store symbol names by their index + symbol_names = {} + + # Extract symbol names and store them in the dictionary for section in elf.iter_sections(): if not isinstance(section, SymbolTableSection): continue - if section["sh_entsize"] == 0: - logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name) - continue - - logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_symbols()) - - for symbol in section.iter_symbols(): + for _, symbol in enumerate(section.iter_symbols()): # The following conditions are based on the following article # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html if not symbol.name: @@ -72,9 +71,23 @@ def extract_file_import_names(elf: ELFFile, **kwargs): if symbol.entry.st_name == 0: continue - # TODO(williballenthin): extract symbol address - # https://github.com/mandiant/capa/issues/1608 - yield Import(symbol.name), FileOffsetAddress(0x0) + symbol_names[_] = symbol.name + + for section in elf.iter_sections(): + if not isinstance(section, RelocationSection): + continue + + if section["sh_entsize"] == 0: + logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name) + continue + + logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations()) + + for relocation in section.iter_relocations(): + # Extract the symbol name from the symbol table using the symbol index in the relocation + if relocation["r_info_sym"] not in symbol_names: + continue + yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"]) def extract_file_section_names(elf: ELFFile, **kwargs): diff --git a/tests/test_elffile_features.py b/tests/test_elffile_features.py index 17fab010..7c10bc48 100644 --- a/tests/test_elffile_features.py +++ b/tests/test_elffile_features.py @@ -23,11 +23,6 @@ def test_elffile_import_features(): "__libc_start_main", "malloc", "__cxa_finalize", - "memfrob@@GLIBC_2.2.5", - "puts@@GLIBC_2.2.5", - "__libc_start_main@@GLIBC_2.2.5", - "malloc@@GLIBC_2.2.5", - "__cxa_finalize@@GLIBC_2.2.5", ] path = Path(SAMPLE_PATH) elf = ELFFile(io.BytesIO(path.read_bytes())) From 04fbcbbbd3de5d579fbedd1fe74149ef882ade73 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 2 Aug 2023 12:36:42 +0000 Subject: [PATCH 18/24] linter: skip native API check for NtProtectVirtualMemory closes #1675 --- CHANGELOG.md | 3 ++- scripts/lint.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f285d59e..5735ccf5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,8 @@ ### Bug Fixes -- Fix binja backend stack string detection. [#1473](https://github.com/mandiant/capa/issues/1473) [@xusheng6](https://github.com/xusheng6) +- Fix binja backend stack string detection. #1473 @xusheng6 +- linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin ### capa explorer IDA Pro plugin diff --git a/scripts/lint.py b/scripts/lint.py index 847f574a..f155c89b 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -569,6 +569,7 @@ class FeatureNtdllNtoskrnlApi(Lint): "ZwCreateProcess", "ZwCreateUserProcess", "RtlCreateUserProcess", + "NtProtectVirtualMemory", ): # ntoskrnl.exe does not export these routines continue From 149983dced86182ab7643d7bd330d34731ed3a21 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 2 Aug 2023 12:42:03 +0000 Subject: [PATCH 19/24] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index bd14f146..7685a232 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit bd14f146d3a52e2c3377e93ee4aed810a3bb892e +Subproject commit 7685a232d94acbe7e69addb8bd89d752c9fa27a2 From ab5c8b11290e1ec0a72eb216a451cc2ba131f8c3 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 2 Aug 2023 12:49:17 +0000 Subject: [PATCH 20/24] linter: skip native API check for NtEnumerateSystemEnvironmentValuesEx --- scripts/lint.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/lint.py b/scripts/lint.py index f155c89b..c243ca16 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -570,6 +570,7 @@ class FeatureNtdllNtoskrnlApi(Lint): "ZwCreateUserProcess", "RtlCreateUserProcess", "NtProtectVirtualMemory", + "NtEnumerateSystemEnvironmentValuesEx", ): # ntoskrnl.exe does not export these routines continue From a538a7bbab7d8d01749df86e8b18b3144d6d4427 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 2 Aug 2023 12:54:03 +0000 Subject: [PATCH 21/24] linter: skip native API check for more UEFI routines --- scripts/lint.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/lint.py b/scripts/lint.py index c243ca16..85de2bfc 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -571,6 +571,8 @@ class FeatureNtdllNtoskrnlApi(Lint): "RtlCreateUserProcess", "NtProtectVirtualMemory", "NtEnumerateSystemEnvironmentValuesEx", + "NtQuerySystemEnvironmentValueEx", + "NtQuerySystemEnvironmentValue", ): # ntoskrnl.exe does not export these routines continue @@ -581,6 +583,7 @@ class FeatureNtdllNtoskrnlApi(Lint): "KeStackAttachProcess", "ObfDereferenceObject", "KeUnstackDetachProcess", + "ExGetFirmwareEnvironmentVariable", ): # ntdll.dll does not export these routines continue From 13533074ea83db3c6a2280a2b3a5c6e0aca1e590 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 2 Aug 2023 13:01:15 +0000 Subject: [PATCH 22/24] devcontainer: install pre-commit hooks --- .devcontainer/devcontainer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index d76fc748..c8444ed3 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -41,7 +41,7 @@ // "forwardPorts": [], // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "git submodule update --init && pip3 install --user -e .[dev]", + "postCreateCommand": "git submodule update --init && pip3 install --user -e .[dev] && pre-commit install", // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. "remoteUser": "vscode", From ee68031d1901aac446b99c121b7cf07faea37634 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Sat, 5 Aug 2023 16:37:46 +0000 Subject: [PATCH 23/24] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index fea6e5f9..bd7d7d0e 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit fea6e5f90c031abed017fb3e43e12323c764c2c0 +Subproject commit bd7d7d0ee075db1ecae501e5255f2e89fde4cf57 From 2c8f99143a20d57112efe0059506ddc4da68f0b1 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Sat, 5 Aug 2023 16:40:13 +0000 Subject: [PATCH 24/24] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index bd7d7d0e..71b5378d 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit bd7d7d0ee075db1ecae501e5255f2e89fde4cf57 +Subproject commit 71b5378dbbe426dd318143f630c5f249c42830a1