mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 07:40:38 -08:00
Compare commits
1 Commits
mr/library
...
wb/library
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
03ce40e781 |
0
capa/analysis/__init__.py
Normal file
0
capa/analysis/__init__.py
Normal file
193
capa/analysis/libraries.py
Normal file
193
capa/analysis/libraries.py
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
"""
|
||||||
|
further requirements:
|
||||||
|
- nltk
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
import collections
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import rich
|
||||||
|
from rich.text import Text
|
||||||
|
|
||||||
|
import capa.analysis.strings
|
||||||
|
import capa.features.extractors.strings
|
||||||
|
from capa.analysis.strings import LibraryStringDatabase
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_strings(buf, n=4):
|
||||||
|
yield from capa.features.extractors.strings.extract_ascii_strings(buf, n=n)
|
||||||
|
yield from capa.features.extractors.strings.extract_unicode_strings(buf, n=n)
|
||||||
|
|
||||||
|
|
||||||
|
def prune_databases(dbs: list[LibraryStringDatabase], n=8):
|
||||||
|
"""remove less trustyworthy database entries.
|
||||||
|
|
||||||
|
such as:
|
||||||
|
- those found in multiple databases
|
||||||
|
- those that are English words
|
||||||
|
- those that are too short
|
||||||
|
- Windows API and DLL names
|
||||||
|
"""
|
||||||
|
|
||||||
|
# TODO: consider applying these filters directly to the persisted databases, not at load time.
|
||||||
|
|
||||||
|
winapi = capa.analysis.strings.WindowsApiStringDatabase.from_defaults()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from nltk.corpus import words as nltk_words
|
||||||
|
except ImportError:
|
||||||
|
# one-time download of dataset.
|
||||||
|
# this probably doesn't work well for embedded use.
|
||||||
|
import nltk
|
||||||
|
nltk.download("words")
|
||||||
|
from nltk.corpus import words as nltk_words
|
||||||
|
words = set(nltk_words.words())
|
||||||
|
|
||||||
|
counter = collections.Counter()
|
||||||
|
to_remove = set()
|
||||||
|
for db in dbs:
|
||||||
|
for string in db.metadata_by_string.keys():
|
||||||
|
counter[string] += 1
|
||||||
|
|
||||||
|
if string in words:
|
||||||
|
to_remove.add(string)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if len(string) < n:
|
||||||
|
to_remove.add(string)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if string in winapi.api_names:
|
||||||
|
to_remove.add(string)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if string in winapi.dll_names:
|
||||||
|
to_remove.add(string)
|
||||||
|
continue
|
||||||
|
|
||||||
|
for string, count in counter.most_common():
|
||||||
|
if count <= 1:
|
||||||
|
break
|
||||||
|
|
||||||
|
# remove strings that are seen in more than one database
|
||||||
|
to_remove.add(string)
|
||||||
|
|
||||||
|
for db in dbs:
|
||||||
|
for string in to_remove:
|
||||||
|
if string in db.metadata_by_string:
|
||||||
|
del db.metadata_by_string[string]
|
||||||
|
|
||||||
|
|
||||||
|
def open_ida(input_path: Path):
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
import idapro
|
||||||
|
|
||||||
|
t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
|
||||||
|
t.write_bytes(input_path.read_bytes())
|
||||||
|
# resource leak: we should delete this upon exit
|
||||||
|
|
||||||
|
idapro.enable_console_messages(False)
|
||||||
|
idapro.open_database(str(t.absolute()), run_auto_analysis=True)
|
||||||
|
|
||||||
|
import ida_auto
|
||||||
|
ida_auto.auto_wait()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
# use n=8 to ignore common words
|
||||||
|
N = 8
|
||||||
|
|
||||||
|
input_path = Path(sys.argv[1])
|
||||||
|
input_buf = input_path.read_bytes()
|
||||||
|
|
||||||
|
dbs = capa.analysis.strings.get_default_databases()
|
||||||
|
prune_databases(dbs, n=N)
|
||||||
|
|
||||||
|
strings_by_library = collections.defaultdict(set)
|
||||||
|
for string in extract_strings(input_path.read_bytes(), n=N):
|
||||||
|
for db in dbs:
|
||||||
|
if (metadata := db.metadata_by_string.get(string.s)):
|
||||||
|
strings_by_library[metadata.library_name].add(string.s)
|
||||||
|
|
||||||
|
console = rich.get_console()
|
||||||
|
console.print(f"found libraries:", style="bold")
|
||||||
|
for library, strings in sorted(strings_by_library.items(), key=lambda p: len(p[1]), reverse=True):
|
||||||
|
console.print(f" - [b]{library}[/] ({len(strings)} strings)")
|
||||||
|
|
||||||
|
for string in sorted(strings)[:10]:
|
||||||
|
console.print(f" - {string}", markup=False, style="grey37")
|
||||||
|
|
||||||
|
if len(strings) > 10:
|
||||||
|
console.print(" ...", style="grey37")
|
||||||
|
|
||||||
|
if not strings_by_library:
|
||||||
|
console.print(" (none)", style="grey37")
|
||||||
|
# since we're not going to find any strings
|
||||||
|
# return early and don't do IDA analysis
|
||||||
|
return
|
||||||
|
|
||||||
|
# TODO: ensure there are XXX matches for each library, or ignore those entries
|
||||||
|
|
||||||
|
open_ida(input_path)
|
||||||
|
|
||||||
|
import idaapi
|
||||||
|
import idautils
|
||||||
|
import ida_funcs
|
||||||
|
import capa.features.extractors.ida.helpers as ida_helpers
|
||||||
|
|
||||||
|
strings_by_function = collections.defaultdict(set)
|
||||||
|
for ea in idautils.Functions():
|
||||||
|
f = idaapi.get_func(ea)
|
||||||
|
|
||||||
|
# ignore library functions and thunk functions as identified by IDA
|
||||||
|
if f.flags & idaapi.FUNC_THUNK:
|
||||||
|
continue
|
||||||
|
if f.flags & idaapi.FUNC_LIB:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for bb in ida_helpers.get_function_blocks(f):
|
||||||
|
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||||
|
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||||
|
if ref == insn.ea:
|
||||||
|
continue
|
||||||
|
|
||||||
|
string = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||||
|
if not string:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for db in dbs:
|
||||||
|
if (metadata := db.metadata_by_string.get(string)):
|
||||||
|
strings_by_function[ea].add(string)
|
||||||
|
|
||||||
|
# ensure there are at least XXX functions renamed, or ignore those entries
|
||||||
|
|
||||||
|
console.print("functions:", style="bold")
|
||||||
|
for function, strings in sorted(strings_by_function.items()):
|
||||||
|
if strings:
|
||||||
|
name = ida_funcs.get_func_name(function)
|
||||||
|
|
||||||
|
console.print(f" [b]{name}[/]@{function:08x}:")
|
||||||
|
|
||||||
|
for string in strings:
|
||||||
|
for db in dbs:
|
||||||
|
if (metadata := db.metadata_by_string.get(string)):
|
||||||
|
location = Text(f"{metadata.library_name}@{metadata.library_version}::{metadata.function_name}", style="grey37")
|
||||||
|
console.print(" - ", location, ": ", string.rstrip())
|
||||||
|
|
||||||
|
# TODO: ensure there aren't conflicts among the matches
|
||||||
|
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
console.print(f"found {len(strings_by_function)} library functions across {len(list(idautils.Functions()))} functions")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
95
capa/analysis/strings/__init__.py
Normal file
95
capa/analysis/strings/__init__.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
import gzip
|
||||||
|
import pathlib
|
||||||
|
from typing import Dict, Sequence
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import msgspec
|
||||||
|
|
||||||
|
|
||||||
|
class LibraryString(msgspec.Struct):
|
||||||
|
string: str
|
||||||
|
library_name: str
|
||||||
|
library_version: str
|
||||||
|
file_path: str | None = None
|
||||||
|
function_name: str | None = None
|
||||||
|
line_number: int | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LibraryStringDatabase:
|
||||||
|
metadata_by_string: Dict[str, LibraryString]
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self.metadata_by_string)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_file(cls, path: pathlib.Path) -> "LibraryStringDatabase":
|
||||||
|
metadata_by_string: Dict[str, LibraryString] = {}
|
||||||
|
decoder = msgspec.json.Decoder(type=LibraryString)
|
||||||
|
for line in gzip.decompress(path.read_bytes()).split(b"\n"):
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
s = decoder.decode(line)
|
||||||
|
metadata_by_string[s.string] = s
|
||||||
|
|
||||||
|
return cls(metadata_by_string=metadata_by_string)
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_FILENAMES = (
|
||||||
|
"brotli.jsonl.gz",
|
||||||
|
"bzip2.jsonl.gz",
|
||||||
|
"cryptopp.jsonl.gz",
|
||||||
|
"curl.jsonl.gz",
|
||||||
|
"detours.jsonl.gz",
|
||||||
|
"jemalloc.jsonl.gz",
|
||||||
|
"jsoncpp.jsonl.gz",
|
||||||
|
"kcp.jsonl.gz",
|
||||||
|
"liblzma.jsonl.gz",
|
||||||
|
"libsodium.jsonl.gz",
|
||||||
|
"libpcap.jsonl.gz",
|
||||||
|
"mbedtls.jsonl.gz",
|
||||||
|
"openssl.jsonl.gz",
|
||||||
|
"sqlite3.jsonl.gz",
|
||||||
|
"tomcrypt.jsonl.gz",
|
||||||
|
"wolfssl.jsonl.gz",
|
||||||
|
"zlib.jsonl.gz",
|
||||||
|
)
|
||||||
|
|
||||||
|
DEFAULT_PATHS = tuple(
|
||||||
|
pathlib.Path(__file__).parent / "data" / "oss" / filename for filename in DEFAULT_FILENAMES
|
||||||
|
) + (pathlib.Path(__file__).parent / "data" / "crt" / "msvc_v143.jsonl.gz",)
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_databases() -> Sequence[LibraryStringDatabase]:
|
||||||
|
return [LibraryStringDatabase.from_file(path) for path in DEFAULT_PATHS]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WindowsApiStringDatabase:
|
||||||
|
dll_names: set[str]
|
||||||
|
api_names: set[str]
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self.dll_names) + len(self.api_names)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dir(cls, path: pathlib.Path) -> "WindowsApiStringDatabase":
|
||||||
|
dll_names: Set[str] = set()
|
||||||
|
api_names: Set[str] = set()
|
||||||
|
|
||||||
|
for line in gzip.decompress((path / "dlls.txt.gz").read_bytes()).decode("utf-8").splitlines():
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
dll_names.add(line)
|
||||||
|
|
||||||
|
for line in gzip.decompress((path / "apis.txt.gz").read_bytes()).decode("utf-8").splitlines():
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
api_names.add(line)
|
||||||
|
|
||||||
|
return cls(dll_names=dll_names, api_names=api_names)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_defaults(cls) -> "WindowsApiStringDatabase":
|
||||||
|
return cls.from_dir(pathlib.Path(__file__).parent / "data" / "winapi")
|
||||||
|
|
||||||
BIN
capa/analysis/strings/data/crt/msvc_v143.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/crt/msvc_v143.jsonl.gz
Normal file
Binary file not shown.
3
capa/analysis/strings/data/oss/.gitignore
vendored
Normal file
3
capa/analysis/strings/data/oss/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
*.csv
|
||||||
|
*.jsonl
|
||||||
|
*.jsonl.gz
|
||||||
BIN
capa/analysis/strings/data/oss/brotli.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/brotli.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/bzip2.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/bzip2.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/cryptopp.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/cryptopp.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/curl.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/curl.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/detours.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/detours.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/jemalloc.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/jemalloc.jsonl.gz
Normal file
Binary file not shown.
52
capa/analysis/strings/data/oss/jh_to_qs.py
Normal file
52
capa/analysis/strings/data/oss/jh_to_qs.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
"""
|
||||||
|
convert from a jh CSV file to a .jsonl.gz OpenSourceString database.
|
||||||
|
|
||||||
|
the jh file looks like:
|
||||||
|
|
||||||
|
# triplet,compiler,library,version,profile,path,function,type,value
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,number,0x00000100
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,number,0xfffffff8
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,number,0xfffffffe
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,api,BZ2_bzCompressInit
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,api,handle_compress
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffDecompress,number,0x0000fa90
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffDecompress,number,0xfffffff8
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffDecompress,number,0xfffffff9
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffDecompress,number,0xfffffffd
|
||||||
|
|
||||||
|
jh is found here: https://github.com/williballenthin/lancelot/blob/master/bin/src/bin/jh.rs
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
import msgspec
|
||||||
|
|
||||||
|
from capa.analysis.strings import LibraryString
|
||||||
|
|
||||||
|
p = pathlib.Path(sys.argv[1])
|
||||||
|
for line in p.read_text().split("\n"):
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if line.startswith("#"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
triplet, compiler, library, version, profile, path, function, rest = line.split(",", 7)
|
||||||
|
type, _, value = rest.partition(",")
|
||||||
|
if type != "string":
|
||||||
|
continue
|
||||||
|
|
||||||
|
if value.startswith('"'):
|
||||||
|
value = json.loads(value)
|
||||||
|
|
||||||
|
s = LibraryString(
|
||||||
|
string=value,
|
||||||
|
library_name=library,
|
||||||
|
library_version=version,
|
||||||
|
file_path=path,
|
||||||
|
function_name=function,
|
||||||
|
)
|
||||||
|
|
||||||
|
sys.stdout.buffer.write(msgspec.json.encode(s))
|
||||||
|
sys.stdout.buffer.write(b"\n")
|
||||||
BIN
capa/analysis/strings/data/oss/jsoncpp.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/jsoncpp.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/kcp.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/kcp.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/liblzma.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/liblzma.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/libpcap.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/libpcap.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/libsodium.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/libsodium.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/mbedtls.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/mbedtls.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/openssl.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/openssl.jsonl.gz
Normal file
Binary file not shown.
99
capa/analysis/strings/data/oss/readme.md
Normal file
99
capa/analysis/strings/data/oss/readme.md
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
# Strings from Open Source libraries
|
||||||
|
|
||||||
|
This directory contains databases of strings extracted from open soure software.
|
||||||
|
capa uses these databases to ignore functions that are likely library code.
|
||||||
|
|
||||||
|
There is one file for each database. Each database is a gzip-compressed, JSONL (one JSON document per line) file.
|
||||||
|
The JSON document looks like this:
|
||||||
|
|
||||||
|
string: "1.0.8, 13-Jul-2019"
|
||||||
|
library_name: "bzip2"
|
||||||
|
library_version: "1.0.8#3"
|
||||||
|
file_path: "CMakeFiles/bz2.dir/bzlib.c.obj"
|
||||||
|
function_name: "BZ2_bzlibVersion"
|
||||||
|
line_number: null
|
||||||
|
|
||||||
|
The following databases were extracted via the vkpkg & jh technique:
|
||||||
|
|
||||||
|
- brotli 1.0.9#5
|
||||||
|
- bzip2 1.0.8#3
|
||||||
|
- cryptopp 8.7.0
|
||||||
|
- curl 7.86.0#1
|
||||||
|
- detours 4.0.1#7
|
||||||
|
- jemalloc 5.3.0#1
|
||||||
|
- jsoncpp 1.9.5
|
||||||
|
- kcp 1.7
|
||||||
|
- liblzma 5.2.5#6
|
||||||
|
- libsodium 1.0.18#8
|
||||||
|
- libpcap 1.10.1#3
|
||||||
|
- mbedtls 2.28.1
|
||||||
|
- openssl 3.0.7#1
|
||||||
|
- sqlite3 3.40.0#1
|
||||||
|
- tomcrypt 1.18.2#2
|
||||||
|
- wolfssl 5.5.0
|
||||||
|
- zlib 1.2.13
|
||||||
|
|
||||||
|
This code was originally developed in FLOSS and imported into capa.
|
||||||
|
|
||||||
|
## The vkpkg & jh technique
|
||||||
|
|
||||||
|
Major steps:
|
||||||
|
|
||||||
|
1. build static libraries via vcpkg
|
||||||
|
2. extract features via jh
|
||||||
|
3. convert to JSONL format with `jh_to_qs.py`
|
||||||
|
4. compress with gzip
|
||||||
|
|
||||||
|
### Build static libraries via vcpkg
|
||||||
|
|
||||||
|
[vcpkg](https://vcpkg.io/en/) is a free C/C++ package manager for acquiring and managing libraries.
|
||||||
|
We use it to easily build common open source libraries, like zlib.
|
||||||
|
Use the triplet `x64-windows-static` to build static archives (.lib files that are AR archives containing COFF object files):
|
||||||
|
|
||||||
|
```console
|
||||||
|
PS > C:\vcpkg\vcpkg.exe install --triplet x64-windows-static zlib
|
||||||
|
```
|
||||||
|
|
||||||
|
### Extract features via jh
|
||||||
|
|
||||||
|
[jh](https://github.com/williballenthin/lancelot/blob/master/bin/src/bin/jh.rs)
|
||||||
|
is a lancelot-based utility that parses AR archives containing COFF object files,
|
||||||
|
reconstructs their control flow, finds functions, and extracts features.
|
||||||
|
jh extracts numbers, API calls, and strings; we are only interested in the string features.
|
||||||
|
|
||||||
|
For each feature, jh emits a CSV line with the fields
|
||||||
|
- target triplet
|
||||||
|
- compiler
|
||||||
|
- library
|
||||||
|
- version
|
||||||
|
- build profile
|
||||||
|
- path
|
||||||
|
- function
|
||||||
|
- feature type
|
||||||
|
- feature value
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```csv
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,number,0x00000100
|
||||||
|
```
|
||||||
|
|
||||||
|
For example, to invoke jh:
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ ~/lancelot/target/release/jh x64-windows-static msvc143 zlib 1.2.13 release /mnt/c/vcpkg/installed/x64-windows-static/lib/zlib.lib > ~/flare-floss/floss/qs/db/data/oss/zlib.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
### Convert to OSS database format
|
||||||
|
|
||||||
|
We use the script `jh_to_qs.py` to convert these CSV lines into JSONL file prepared for FLOSS:
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ python3 jh_to_qs.py zlib.csv > zlib.jsonl
|
||||||
|
```
|
||||||
|
|
||||||
|
These files are then gzip'd:
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ gzip -c zlib.jsonl > zlib.jsonl.gz
|
||||||
|
```
|
||||||
BIN
capa/analysis/strings/data/oss/sqlite3.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/sqlite3.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/tomcrypt.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/tomcrypt.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/wolfssl.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/wolfssl.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/zlib.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/zlib.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/winapi/apis.txt.gz
Normal file
BIN
capa/analysis/strings/data/winapi/apis.txt.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/winapi/dlls.txt.gz
Normal file
BIN
capa/analysis/strings/data/winapi/dlls.txt.gz
Normal file
Binary file not shown.
Reference in New Issue
Block a user