# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import textwrap

import pytest

import capa.rules
import capa.engine
import capa.features.insn
import capa.features.common
from capa.rules import Scope


def match(rules, features, va, scope=Scope.FUNCTION):
    """
    use all matching algorithms and verify that they compute the same result.
    then, return those results to the caller so they can make their asserts.
    """
    features1, matches1 = capa.engine.match(rules, features, va)

    ruleset = capa.rules.RuleSet(rules)
    features2, matches2 = ruleset.match(scope, features, va)

    for feature, locations in features1.items():
        assert feature in features2
        assert locations == features2[feature]

    for rulename, results in matches1.items():
        assert rulename in matches2
        assert len(results) == len(matches2[rulename])

    return features1, matches1


@pytest.mark.parametrize(
    "pattern",
    [
        "/test\\.exe/",
        "/hello/i",
        "/foo\\\\bar/",
    ],
)
def test_regex_get_value_str(pattern):
    # Regex.get_value_str() must return the raw pattern without escaping, see #1909.
    assert capa.features.common.Regex(pattern).get_value_str() == pattern


@pytest.mark.xfail(reason="can't have top level NOT")
def test_match_only_not():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
                scopes:
                    static: function
                    dynamic: process
                namespace: testns1/testns2
            features:
                - not:
                    - number: 99
        """)
    r = capa.rules.Rule.from_yaml(rule)

    _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
    assert "test rule" in matches


@pytest.mark.xfail(reason="can't have nested NOT")
def test_match_not_not():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
                scopes:
                    static: function
                    dynamic: process
                namespace: testns1/testns2
            features:
                - not:
                    - not:
                        - number: 100
        """)
    r = capa.rules.Rule.from_yaml(rule)

    _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
    assert "test rule" in matches


# this test demonstrates the behavior of unstable features that may change before the next major release.
def test_index_features_and_unstable():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
                scopes:
                    static: function
                    dynamic: process
            features:
                - and:
                    - mnemonic: mov
                    - api: CreateFileW
        """)
    r = capa.rules.Rule.from_yaml(rule)
    rr = capa.rules.RuleSet([r])
    index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION]

    # there's a single rule, and its indexed by a single feature
    assert len(index.rules_by_feature) == 1
    # and we index by the more uncommon API feature, not the common mnemonic feature
    assert capa.features.insn.API("CreateFileW") in index.rules_by_feature

    assert not index.string_rules
    assert not index.bytes_prefix_index


# this test demonstrates the behavior of unstable features that may change before the next major release.
def test_index_features_or_unstable():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
                scopes:
                    static: function
                    dynamic: process
            features:
                - or:
                    - mnemonic: mov
                    - api: CreateFileW
        """)
    r = capa.rules.Rule.from_yaml(rule)
    rr = capa.rules.RuleSet([r])
    index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION]

    # there's a single rule, and its indexed by both features,
    # because they fall under the single root OR node.
    assert len(index.rules_by_feature) == 2
    assert capa.features.insn.API("CreateFileW") in index.rules_by_feature
    assert capa.features.insn.Mnemonic("mov") in index.rules_by_feature

    assert not index.string_rules
    assert not index.bytes_prefix_index


# this test demonstrates the behavior of unstable features that may change before the next major release.
def test_index_features_nested_unstable():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
                scopes:
                    static: function
                    dynamic: process
            features:
                - and:
                    - mnemonic: mov
                    - or:
                        - api: CreateFileW
                        - string: foo
        """)
    r = capa.rules.Rule.from_yaml(rule)
    rr = capa.rules.RuleSet([r])
    index: capa.rules.RuleSet._RuleFeatureIndex = rr._feature_indexes_by_scopes[capa.rules.Scope.FUNCTION]

    # there's a single rule, and its indexed by the two uncommon features,
    # not the single common feature.
    assert len(index.rules_by_feature) == 2
    assert capa.features.insn.API("CreateFileW") in index.rules_by_feature
    assert capa.features.common.String("foo") in index.rules_by_feature
    assert capa.features.insn.Mnemonic("mov") not in index.rules_by_feature

    assert not index.string_rules
    assert not index.bytes_prefix_index


def test_bytes_prefix_index_correctness():
    """Verify that the bytes prefix pre-filter preserves match behavior."""
    rule_text = textwrap.dedent("""
        rule:
            meta:
                name: test bytes prefix index
                scopes:
                    static: function
                    dynamic: process
            features:
                - bytes: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
        """)
    r = capa.rules.Rule.from_yaml(rule_text)

    # 16 nop bytes - exact match
    nop16 = b"\x90" * 16
    _, matches = match([r], {capa.features.common.Bytes(nop16): {0x0}}, 0x0)
    assert "test bytes prefix index" in matches

    # 32 nop bytes - startswith match (first 16 bytes are nops)
    nop32 = b"\x90" * 32
    _, matches = match([r], {capa.features.common.Bytes(nop32): {0x0}}, 0x0)
    assert "test bytes prefix index" in matches

    # Different bytes - should not match
    other = b"\x00" * 16
    _, matches = match([r], {capa.features.common.Bytes(other): {0x0}}, 0x0)
    assert "test bytes prefix index" not in matches

    # Bytes shorter than pattern - should not match
    short = b"\x90" * 8
    _, matches = match([r], {capa.features.common.Bytes(short): {0x0}}, 0x0)
    assert "test bytes prefix index" not in matches


def test_bytes_prefix_index_collision():
    rule_text = textwrap.dedent("""
        rule:
            meta:
                name: test bytes prefix collision
                scopes:
                    static: function
                    dynamic: process
            features:
                - bytes: 41 42 43 44 45 46 47 48
        """)
    r = capa.rules.Rule.from_yaml(rule_text)

    features = {
        capa.features.common.Bytes(b"ABCD1234"): {0x0},
        capa.features.common.Bytes(b"ABCDEFGHzz"): {0x1},
    }
    _, matches = match([r], features, 0x0)
    assert "test bytes prefix collision" in matches


def test_bytes_prefix_index_short_pattern_fallback():
    rule_text = textwrap.dedent("""
        rule:
            meta:
                name: test bytes short prefix fallback
                scopes:
                    static: function
                    dynamic: process
            features:
                - bytes: 41 42 43
        """)
    r = capa.rules.Rule.from_yaml(rule_text)

    _, matches = match([r], {capa.features.common.Bytes(b"ABCDEF"): {0x0}}, 0x0)
    assert "test bytes short prefix fallback" in matches

    _, matches = match([r], {capa.features.common.Bytes(b"XABCDEF"): {0x0}}, 0x0)
    assert "test bytes short prefix fallback" not in matches


def test_bytes_prefix_index_mixed_short_and_long_patterns():
    """A rule with both a short (<4B) and a long (>=4B) bytes pattern exercises both code paths."""
    short_rule_text = textwrap.dedent("""
        rule:
            meta:
                name: test short pattern rule
                scopes:
                    static: function
                    dynamic: process
            features:
                - bytes: AA BB
        """)
    long_rule_text = textwrap.dedent("""
        rule:
            meta:
                name: test long pattern rule
                scopes:
                    static: function
                    dynamic: process
            features:
                - bytes: CC DD EE FF 11 22 33 44
        """)
    short_rule = capa.rules.Rule.from_yaml(short_rule_text)
    long_rule = capa.rules.Rule.from_yaml(long_rule_text)

    # Both rules match their respective extracted values.
    features = {
        capa.features.common.Bytes(b"\xaa\xbb\xcc"): {0x0},
        capa.features.common.Bytes(b"\xcc\xdd\xee\xff\x11\x22\x33\x44\x55"): {0x1},
    }
    _, matches = match([short_rule, long_rule], features, 0x0)
    assert "test short pattern rule" in matches
    assert "test long pattern rule" in matches

    # Only the short rule matches when the long pattern is absent.
    _, matches = match([short_rule, long_rule], {capa.features.common.Bytes(b"\xaa\xbb\xcc"): {0x0}}, 0x0)
    assert "test short pattern rule" in matches
    assert "test long pattern rule" not in matches

    # Only the long rule matches when the short pattern is absent.
    _, matches = match(
        [short_rule, long_rule],
        {capa.features.common.Bytes(b"\xcc\xdd\xee\xff\x11\x22\x33\x44"): {0x0}},
        0x0,
    )
    assert "test short pattern rule" not in matches
    assert "test long pattern rule" in matches


def test_match_no_duplicate_candidate_evaluations():
    """
    Ensure that when a rule has multiple candidate paths to trigger it,
    it is evaluated only once and does not create duplicate match results.
    Verifies both global deduplication (avoiding re-queuing rules already
    evaluated/queued) and local deduplication (avoiding duplicate queueing when
    multiple features trigger the same candidate in a single pass).
    """
    rules = [
        capa.rules.Rule.from_yaml(
            textwrap.dedent("""
                rule:
                    meta:
                        name: Dependency Rule 1
                        scopes:
                            static: function
                            dynamic: process
                    features:
                        - number: 100
                """)
        ),
        capa.rules.Rule.from_yaml(
            textwrap.dedent("""
                rule:
                    meta:
                        name: Dependency Rule 2
                        scopes:
                            static: function
                            dynamic: process
                        namespace: testns
                    features:
                        - number: 300
                """)
        ),
        capa.rules.Rule.from_yaml(
            textwrap.dedent("""
                rule:
                    meta:
                        name: Target Rule
                        scopes:
                            static: function
                            dynamic: process
                    features:
                        - or:
                            # Trigger Case 1 (Global): Target Rule is seeded by number 200,
                            # and also gets triggered later when Dependency Rule 1 matches.
                            - match: Dependency Rule 1
                            - number: 200

                            # Trigger Case 2 (Local): Target Rule depends on both rule name and namespace,
                            # which will try to add it twice in the same iteration when Dependency Rule 2 matches.
                            - and:
                                - match: Dependency Rule 2
                                - match: testns
                """)
        ),
    ]

    # Seed all features to trigger both Case 1 and Case 2
    features = {
        capa.features.insn.Number(100): {0x0},
        capa.features.insn.Number(200): {0x0},
        capa.features.insn.Number(300): {0x0},
    }

    _, matches = match(
        capa.rules.topologically_order_rules(rules),
        features,
        0x0,
    )

    assert "Dependency Rule 1" in matches
    assert "Dependency Rule 2" in matches
    assert "Target Rule" in matches

    # Ensure Target Rule was evaluated and returned exactly ONCE
    assert len(matches["Target Rule"]) == 1