diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 691850ab..f595f9ac 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,6 +3,10 @@ name: build on: pull_request: branches: [ master ] + paths-ignore: + - 'web/**' + - 'doc/**' + - '**.md' release: types: [edited, published] diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5002ae9d..9aa826ef 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,10 +1,22 @@ name: CI +# tests.yml workflow will run for all changes except: +# any file or directory under web/ or doc/ +# any Markdown (.md) file anywhere in the repository + on: push: branches: [ master ] + paths-ignore: + - 'web/**' + - 'doc/**' + - '**.md' pull_request: branches: [ master ] + paths-ignore: + - 'web/**' + - 'doc/**' + - '**.md' permissions: read-all diff --git a/.github/workflows/deploy-webui.yml b/.github/workflows/web-deploy.yml similarity index 59% rename from .github/workflows/deploy-webui.yml rename to .github/workflows/web-deploy.yml index c1d184d9..c29e7171 100644 --- a/.github/workflows/deploy-webui.yml +++ b/.github/workflows/web-deploy.yml @@ -1,11 +1,10 @@ -name: deploy Capa Explorer Web to Github Pages +name: deploy web to GitHub Pages on: - # Runs on pushes targeting the webui branch push: - branches: [ master ] + branches: [ master, "wb/webui-actions-1" ] paths: - - 'web/explorer/**' + - 'web/**' # Allows to run this workflow manually from the Actions tab workflow_dispatch: @@ -22,10 +21,17 @@ concurrency: cancel-in-progress: true jobs: - deploy: - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} + build-landing-page: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: actions/upload-artifact@v4 + with: + name: landing-page + path: './web/public' + + build-explorer: runs-on: ubuntu-latest steps: - name: Checkout @@ -43,24 +49,41 @@ jobs: - name: Install dependencies run: npm ci working-directory: ./web/explorer - - name: Lint - run: npm run lint + - name: Generate release bundle + run: npm run build:bundle working-directory: ./web/explorer - - name: Format - run: npm run format:check - working-directory: ./web/explorer - - name: Run unit tests - run: npm run test + - name: Zip release bundle + run: zip -r public/capa-explorer-web.zip capa-explorer-web working-directory: ./web/explorer - name: Build run: npm run build working-directory: ./web/explorer + - uses: actions/upload-artifact@v4 + with: + name: explorer + path: './web/explorer/dist' + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: [build-landing-page, build-explorer] + steps: + - uses: actions/download-artifact@v4 + with: + name: landing-page + path: './public/' + - uses: actions/download-artifact@v4 + with: + name: explorer + path: './public/explorer' - name: Setup Pages uses: actions/configure-pages@v4 - name: Upload artifact uses: actions/upload-pages-artifact@v3 with: - path: './web/explorer/dist' + path: './public' - name: Deploy to GitHub Pages id: deployment uses: actions/deploy-pages@v4 diff --git a/.gitmodules b/.gitmodules index 079d13dc..93c0a437 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "rules"] path = rules - url = ../capa-rules.git + url = ../../mandiant/capa-rules.git [submodule "tests/data"] path = tests/data - url = ../capa-testfiles.git + url = ../../mandiant/capa-testfiles.git diff --git a/CHANGELOG.md b/CHANGELOG.md index eb1ad4d7..10f622a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,13 +9,17 @@ Unlock powerful malware analysis with capa's new [VMRay sandbox](https://www.vmr - webui: explore capa analysis results in a web-based UI online and offline #2224 @s-ff - support analyzing DRAKVUF traces #2143 @yelhamer - dynamic: add support for VMRay dynamic sandbox traces #2208 @mike-hunhoff @r-sm2024 @mr-tz +- IDA extractor: extract names from dynamically resolved APIs stored in renamed global variables #2201 @Ana06 ### Breaking Changes -### New Rules (2) +### New Rules (5) - nursery/upload-file-to-onedrive jaredswilson@google.com ervinocampo@google.com - data-manipulation/encoding/base64/decode-data-using-base64-via-vbmi-lookup-table still@teamt5.org +- communication/socket/attach-bpf-to-socket-on-linux jakub.jozwiak@mandiant.com +- anti-analysis/anti-av/overwrite-dll-text-section-to-remove-hooks jakub.jozwiak@mandiant.com +- nursery/delete-file-on-linux mehunhoff@google.com - ### Bug Fixes @@ -28,6 +32,8 @@ Unlock powerful malware analysis with capa's new [VMRay sandbox](https://www.vmr ### Development - CI: use macos-12 since macos-11 is deprecated and will be removed on June 28th, 2024 #2173 @mr-tz - CI: update Binary Ninja version to 4.1 and use Python 3.9 to test it #2211 @xusheng6 +- CI: update tests.yml workflow to exclude web and documentation files #2263 @s-ff +- CI: update build.yml workflow to exclude web and documentation files #2270 @s-ff ### Raw diffs - [capa v7.1.0...master](https://github.com/mandiant/capa/compare/v7.1.0...master) diff --git a/README.md b/README.md index f3364c69..be07071c 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,13 @@ capa detects capabilities in executable files. You run it against a PE, ELF, .NET module, shellcode file, or a sandbox report and it tells you what it thinks the program can do. For example, it might suggest that the file is a backdoor, is capable of installing services, or relies on HTTP to communicate. -Check out our capa blog posts: -- [Dynamic capa: Exploring Executable Run-Time Behavior with the CAPE Sandbox](https://www.mandiant.com/resources/blog/dynamic-capa-executable-behavior-cape-sandbox) -- [capa v4: casting a wider .NET](https://www.mandiant.com/resources/blog/capa-v4-casting-wider-net) (.NET support) -- [ELFant in the Room – capa v3](https://www.mandiant.com/resources/elfant-in-the-room-capa-v3) (ELF support) -- [capa 2.0: Better, Stronger, Faster](https://www.mandiant.com/resources/capa-2-better-stronger-faster) -- [capa: Automatically Identify Malware Capabilities](https://www.mandiant.com/resources/capa-automatically-identify-malware-capabilities) +To interactively inspect capa results in your browser use the [capa web explorer](https://mandiant.github.io/capa/explorer/). +If you want to inspect or write capa rules, head on over to the [capa-rules repository](https://github.com/mandiant/capa-rules). Otherwise, keep reading. + +Below you find a list of [our capa blog posts with more details.](#blog-posts) + +# example capa output ``` $ capa.exe suspicious.exe @@ -72,16 +72,23 @@ Download stable releases of the standalone capa binaries [here](https://github.c To use capa as a library or integrate with another tool, see [doc/installation.md](https://github.com/mandiant/capa/blob/master/doc/installation.md) for further setup instructions. -For more information about how to use capa, see [doc/usage.md](https://github.com/mandiant/capa/blob/master/doc/usage.md). +# web explorer +The [capa web explorer](https://mandiant.github.io/capa/explorer/) enables you to interactively explore capa results in your web browser. Besides the online version you can download a standalone HTML file for local offline usage. + +![capa web explorer screenshot](https://github.com/mandiant/capa/blob/master/doc/img/capa_web_explorer.png) + +More details on the web UI is available in the [capa web explorer README](https://github.com/mandiant/capa/blob/master/web/explorer/README.md). # example -In the above sample output, we ran capa against an unknown binary (`suspicious.exe`), -and the tool reported that the program can send HTTP requests, decode data via XOR and Base64, +In the above sample output, we run capa against an unknown binary (`suspicious.exe`), +and the tool reports that the program can send HTTP requests, decode data via XOR and Base64, install services, and spawn new processes. Taken together, this makes us think that `suspicious.exe` could be a persistent backdoor. Therefore, our next analysis step might be to run `suspicious.exe` in a sandbox and try to recover the command and control server. +## detailed results + By passing the `-vv` flag (for very verbose), capa reports exactly where it found evidence of these capabilities. This is useful for at least two reasons: @@ -131,6 +138,7 @@ capa also supports dynamic capabilities detection for multiple sandboxes includi * [DRAKVUF](https://github.com/CERT-Polska/drakvuf-sandbox/) (supported report formats: `.log`, `.log.gz`) * [VMRay](https://www.vmray.com/) (supported report formats: analysis archive `.zip`) + To use this feature, submit your file to a supported sandbox and then download and run capa against the generated report file. This feature enables capa to match capabilities against dynamic and static features that the sandbox captured during execution. Here's an example of running capa against a packed file, and then running capa against the CAPE report generated for the same packed file: @@ -220,6 +228,7 @@ $ capa 05be49819139a3fdcdbddbdefd298398779521f3d68daa25275cc77508e42310.json ┕━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ ``` +# capa rules capa uses a collection of rules to identify capabilities within a program. These rules are easy to write, even for those new to reverse engineering. By authoring rules, you can extend the capabilities that capa recognizes. @@ -256,18 +265,28 @@ rule: - property/read: System.Net.Sockets.TcpClient::Client ``` -The [github.com/mandiant/capa-rules](https://github.com/mandiant/capa-rules) repository contains hundreds of standard library rules that are distributed with capa. +The [github.com/mandiant/capa-rules](https://github.com/mandiant/capa-rules) repository contains hundreds of standard rules that are distributed with capa. Please learn to write rules and contribute new entries as you find interesting techniques in malware. +# IDA Pro plugin: capa explorer If you use IDA Pro, then you can use the [capa explorer](https://github.com/mandiant/capa/tree/master/capa/ida/plugin) plugin. capa explorer helps you identify interesting areas of a program and build new capa rules using features extracted directly from your IDA Pro database. +It also uses your local changes to the .idb to extract better features, such as when you rename a global variable that contains a dynamically resolved API address. ![capa + IDA Pro integration](https://github.com/mandiant/capa/blob/master/doc/img/explorer_expanded.png) +# Ghidra integration If you use Ghidra, then you can use the [capa + Ghidra integration](/capa/ghidra/) to run capa's analysis directly on your Ghidra database and render the results in Ghidra's user interface. +# blog posts +- [Dynamic capa: Exploring Executable Run-Time Behavior with the CAPE Sandbox](https://www.mandiant.com/resources/blog/dynamic-capa-executable-behavior-cape-sandbox) +- [capa v4: casting a wider .NET](https://www.mandiant.com/resources/blog/capa-v4-casting-wider-net) (.NET support) +- [ELFant in the Room – capa v3](https://www.mandiant.com/resources/elfant-in-the-room-capa-v3) (ELF support) +- [capa 2.0: Better, Stronger, Faster](https://www.mandiant.com/resources/capa-2-better-stronger-faster) +- [capa: Automatically Identify Malware Capabilities](https://www.mandiant.com/resources/capa-automatically-identify-malware-capabilities) + # further information ## capa - [Installation](https://github.com/mandiant/capa/blob/master/doc/installation.md) diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index e031b7a5..bd70d0fa 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -5,9 +5,11 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Any, Dict, Tuple, Iterator +import re +from typing import Any, Dict, Tuple, Iterator, Optional import idc +import ida_ua import idaapi import idautils @@ -35,9 +37,9 @@ def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]: return ctx["externs_cache"] -def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[Any]: +def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Optional[Tuple[str, str]]: """check instruction for API call""" - info = () + info = None ref = insn.ea # attempt to resolve API calls by following chained thunks to a reasonable depth @@ -52,7 +54,7 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[A except IndexError: break - info = funcs.get(ref, ()) + info = funcs.get(ref) if info: break @@ -60,8 +62,7 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[A if not f or not (f.flags & idaapi.FUNC_THUNK): break - if info: - yield info + return info def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: @@ -76,16 +77,39 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) if insn.get_canon_mnem() not in ("call", "jmp"): return - # check calls to imported functions - for api in check_for_api_call(insn, get_imports(fh.ctx)): + # check call to imported functions + api = check_for_api_call(insn, get_imports(fh.ctx)) + if api: # tuple (, , ) for name in capa.features.extractors.helpers.generate_symbols(api[0], api[1]): yield API(name), ih.address + # a call instruction should only call one function, stop if a call to an import is extracted + return - # check calls to extern functions - for api in check_for_api_call(insn, get_externs(fh.ctx)): + # check call to extern functions + api = check_for_api_call(insn, get_externs(fh.ctx)) + if api: # tuple (, , ) yield API(api[1]), ih.address + # a call instruction should only call one function, stop if a call to an extern is extracted + return + + # extract dynamically resolved APIs stored in renamed globals (renamed for example using `renimp.idc`) + # examples: `CreateProcessA`, `HttpSendRequestA` + if insn.Op1.type == ida_ua.o_mem: + op_addr = insn.Op1.addr + op_name = idaapi.get_name(op_addr) + # when renaming a global using an API name, IDA assigns it the function type + # ensure we do not extract something wrong by checking that the address has a name and a type + # we could check that the type is a function definition, but that complicates the code + if (not op_name.startswith("off_")) and idc.get_type(op_addr): + # Remove suffix used in repeated names, for example _0 in VirtualFree_0 + match = re.match(r"(.+)_\d+", op_name) + if match: + op_name = match.group(1) + # the global name does not include the DLL name, so we can't extract it + for name in capa.features.extractors.helpers.generate_symbols("", op_name): + yield API(name), ih.address # extract IDA/FLIRT recognized API functions targets = tuple(idautils.CodeRefsFrom(insn.ea, False)) diff --git a/capa/ida/plugin/README.md b/capa/ida/plugin/README.md index 4bf3616c..0af39a4a 100644 --- a/capa/ida/plugin/README.md +++ b/capa/ida/plugin/README.md @@ -81,6 +81,7 @@ can update using the `Settings` button. * Double-click the `Address` column to navigate your Disassembly view to the address of the associated feature * Double-click a result in the `Rule Information` column to expand its children * Select a checkbox in the `Rule Information` column to highlight the address of the associated feature in your Disassembly view +* Reanalyze if you renamed global variables that store dynamically resolved APIs. capa will use these to improve its analysis. #### Tips for Rule Generator diff --git a/doc/img/capa_web_explorer.png b/doc/img/capa_web_explorer.png new file mode 100644 index 00000000..ded77c18 Binary files /dev/null and b/doc/img/capa_web_explorer.png differ diff --git a/pyproject.toml b/pyproject.toml index 5c71f161..c2e686b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -137,7 +137,7 @@ dev = [ "flake8-use-pathlib==0.3.0", "flake8-copyright==0.2.4", "ruff==0.5.6", - "black==24.4.2", + "black==24.8.0", "isort==5.13.2", "mypy==1.11.1", "mypy-protobuf==3.6.0", @@ -148,8 +148,8 @@ dev = [ "types-PyYAML==6.0.8", "types-tabulate==0.9.0.20240106", "types-termcolor==1.1.4", - "types-psutil==5.8.23", - "types_requests==2.32.0.20240602", + "types-psutil==6.0.0.20240621", + "types_requests==2.32.0.20240712", "types-protobuf==5.27.0.20240626", "deptry==0.17.0" ] @@ -158,7 +158,7 @@ build = [ # we want all developer environments to be consistent. # These dependencies are not used in production environments # and should not conflict with other libraries/tooling. - "pyinstaller==6.9.0", + "pyinstaller==6.10.0", "setuptools==70.0.0", "build==1.2.1" ] @@ -189,6 +189,7 @@ known_first_party = [ "ida_loader", "ida_nalt", "ida_segment", + "ida_ua", "idaapi", "idautils", "idc", diff --git a/requirements.txt b/requirements.txt index 8420527c..7bc74d07 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ mdurl==0.1.2 msgpack==1.0.8 networkx==3.1 pefile==2023.2.7 -pip==24.1.2 +pip==24.2 protobuf==5.27.3 pyasn1==0.4.8 pyasn1-modules==0.2.8 diff --git a/rules b/rules index 0e2500fa..5b8c8a63 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 0e2500fa8afac0957a616b7b14c7d38ee1beb588 +Subproject commit 5b8c8a63a29c4fed88078278e7f8c8fdfbad709a diff --git a/tests/data b/tests/data index 245d1dbf..ad887bbe 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 245d1dbfed3b15203a77ae9271f284f50d7effa2 +Subproject commit ad887bbed928097655282d2a53b3450575067f3f diff --git a/web/explorer/index.html b/web/explorer/index.html index dfc8c32d..d72238ac 100644 --- a/web/explorer/index.html +++ b/web/explorer/index.html @@ -2,7 +2,7 @@ - + Capa Explorer diff --git a/web/explorer/package.json b/web/explorer/package.json index ce512d48..81e05df4 100644 --- a/web/explorer/package.json +++ b/web/explorer/package.json @@ -6,7 +6,7 @@ "scripts": { "dev": "vite", "build": "vite build", - "build:bundle": "vite build --mode bundle", + "build:bundle": "vite build --mode bundle --outDir=capa-explorer-web", "preview": "vite preview", "test": "vitest", "lint": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs --fix --ignore-path .gitignore", diff --git a/web/explorer/src/App.vue b/web/explorer/src/App.vue index a05399de..777ff327 100644 --- a/web/explorer/src/App.vue +++ b/web/explorer/src/App.vue @@ -1,4 +1,5 @@ diff --git a/web/explorer/src/components/NavBar.vue b/web/explorer/src/components/NavBar.vue index 9a466aac..1c19291e 100644 --- a/web/explorer/src/components/NavBar.vue +++ b/web/explorer/src/components/NavBar.vue @@ -1,29 +1,29 @@