rosenpass/marzipan/src/parser.py

import sys

from lark import Lark, Token, Transformer, exceptions, tree

# taken from Page 17 in the ProVerif manual
# At the moment, we do not reject a ProVerif model that uses reserved words as identifier,
# because this caused problems with the LARK grammar. We plan to check this in a later
# processing step.
reserved_words = [
    "among",
    "axiom",
    "channel",
    "choice",
    "clauses",
    "const",
    "def",
    "diff",
    "do",
    "elimtrue",
    "else",
    "equation",
    "equivalence",  # no rule yet (this is CryptoVerif-specific)
    "event",
    "expand",
    "fail",
    "for",
    "forall",
    "foreach",
    "free",
    "fun",
    "get",
    "if",
    "implementation",  # no rule yet (this is CryptoVerif-specific)
    "in",
    "inj-event",
    "insert",
    "lemma",
    "let",
    "letfun",
    "letproba",
    "new",
    "noninterf",
    "noselect",
    "not",
    "nounif",
    "or",
    "otherwise",
    "out",
    "param",
    "phase",
    "pred",
    "proba",
    "process",
    "proof",
    "public_vars",
    "putbegin",
    "query",
    "reduc",
    "restriction",
    "secret",
    "select",
    "set",
    "suchthat",
    "sync",
    "table",
    "then",
    "type",
    "weaksecret",
    "yield",
]

ident_regex = (
    "/^" + "".join(f"(?!{w}$)" for w in reserved_words) + "[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/"
)

proverif_grammar = Lark(
    grammar="""
    PROCESS: "process"
    start: decl* PROCESS process
    YIELD: "yield"
    channel: CHANNEL
    CHANNEL: "channel"
    """
    + "IDENT: /[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/"
    + """
    ZERO: "0"
    INFIX: "||"
         | "&&"
         | "="
         | "<>"
         | "<="
         | ">="
         | "<"
         | ">"
    typeid: channel
          | IDENT
    _non_empty_seq{x}: x ("," x)*
    _maybe_empty_seq{x}: [ _non_empty_seq{x} ]

    OPTIONS_FUN_CONST: "data" | "private" | "typeConverter"
    OPTIONS_FUN: OPTIONS_FUN_CONST
    OPTIONS_CONST: OPTIONS_FUN_CONST
    OPTIONS_FREE_REDUC: "private"
    OPTIONS_PRED: "memberOptim" | "block"
    OPTIONS_PROCESS: "precise"
    OPTIONS_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif"
    OPTIONS_AXIOM: OPTIONS_QUERY_LEMMA_AXIOM
    OPTIONS_QUERY_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "induction" | "noInduction"
    OPTIONS_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "maxSubset"
    OPTIONS_QUERY: OPTIONS_QUERY_LEMMA_AXIOM | "proveAll"
    OPTIONS_QUERY_SECRET: "reachability" | "pv_reachability" | "real_or_random" | "pv_real_or_random" | "/cv_[a-zA-Z0-9À-ÿ'_]*/"
    OPTIONS_RESTRICTION: "removeEvents" | "keepEvents" | "keep" # transl_option_lemma_query in pitsyntax.ml
    OPTIONS_EQUATION: "convergent" | "linear" # check_equations in pitsyntax.ml
    OPTIONS_TYPE: "fixed" | "bounded" # TODO(blipp): complete this. These are only for compatibility with CryptoVerif and are ignored
    options{idents}: [ "[" _non_empty_seq{idents} "]" ]
    process: ZERO
           | YIELD
           | IDENT [ "(" _maybe_empty_seq{pterm} ")" ]
           | bracketed_process
           | piped_process
           | replicated_process
           | replicated_process_bounds
           | sample_process
           | if_process
           | in_process
           | out_process
           | let_process
           | insert_process
           | get_process
           | event_process
           | phase
           | sync
    bracketed_process: "(" process ")"
    piped_process: process "|" process
    replicated_process: "!" process
    replicated_process_bounds: "!" IDENT "<=" IDENT process
                             | "foreach" IDENT "<=" IDENT "do" process
    sample_process: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" process]
                  | IDENT "<-R" typeid [";" process]
    let_process: "let" pattern "=" pterm ["in" process [ "else" process ]]
               | IDENT [":" typeid] "<-" pterm [";" process]
               | "let" typedecl "suchthat" pterm options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
    if_process: "if" pterm "then" process [ "else" process ]
    in_process: "in" "(" pterm "," pattern ")" options{OPTIONS_PROCESS} [ ";" process ]
    get_process: IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
    out_process: "out" "(" pterm "," pterm ")" [ ";" process ]
    insert_process: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" [ ";" process ]
    event_process: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] [ ";" process ]
    term: IDENT
          | NAT
          | "(" _maybe_empty_seq{term} ")"
          | IDENT "(" _maybe_empty_seq{term} ")"
          | term ( "+" | "-" ) NAT
          | NAT "+" term
          | term INFIX term
          | "not" "(" term ")"

    query: gterm ["public_vars" _non_empty_seq{IDENT}] [";" query]
         | "secret" IDENT ["public_vars" _non_empty_seq{IDENT}] options{OPTIONS_QUERY_SECRET} [";" query]
         | "putbegin" "event" ":" _non_empty_seq{IDENT} [";" query] // Opportunistically left a space between "event" and ":", ProVerif might not accept it with spaces.
         | "putbegin" "inj-event" ":" _non_empty_seq{IDENT} [";" query]
    lemma: gterm [";" lemma]
         | gterm "for" "{" "public_vars" _non_empty_seq{IDENT} "}" [";" lemma]
         | gterm "for" "{" "secret" IDENT [ "public_vars" _non_empty_seq{IDENT}] "[real_or_random]" "}" [";" lemma]
    gterm: ident_gterm
         | fun_gterm
         | choice_gterm
         | infix_gterm
         | arith_gterm
         | arith2_gterm
         | event_gterm
         | injevent_gterm
         | implies_gterm
         | paren_gterm
         | sample_gterm
         | let_gterm
    ident_gterm: IDENT
    fun_gterm: IDENT "(" _maybe_empty_seq{gterm} ")" ["phase" NAT] ["@" IDENT]
    choice_gterm: "choice" "[" gterm "," gterm "]"
    infix_gterm: gterm INFIX gterm
    arith_gterm: gterm ( "+" | "-" ) NAT
    arith2_gterm: NAT "+" gterm
    event_gterm: "event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
    injevent_gterm: "inj-event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
    implies_gterm: gterm "==>" gterm
    paren_gterm: "(" _maybe_empty_seq{gterm} ")"
    sample_gterm: "new" IDENT [ "[" [ gbinding ] "]" ]
    let_gterm: "let" IDENT "=" gterm "in" gterm

    gbinding: "!" NAT "=" gterm [";" gbinding]
            | IDENT "=" gterm [";" gbinding]

    nounifdecl: "let" IDENT "=" gformat "in" nounifdecl
              | IDENT ["(" _maybe_empty_seq{gformat} ")" ["phase" NAT]]
    gformat: IDENT
           | "*" IDENT
           | IDENT "(" _maybe_empty_seq{gformat} ")"
           | "choice" "[" gformat "," gformat "]"
           | "not" "(" _maybe_empty_seq{gformat} ")"
           | "new" IDENT [ "[" [ fbinding ] "]" ]
           | "let" IDENT "=" gformat "in" gformat
    fbinding: "!" NAT "=" gformat [";" fbinding]
            | IDENT "=" gformat [";" fbinding]
    nounifoption: "hypothesis"
                | "conclusion"
                | "ignoreAFewTimes"
                | "inductionOn" "=" IDENT
                | "inductionOn" "=" "{" _non_empty_seq{IDENT} "}"

    pterm: IDENT
          | "(" _maybe_empty_seq{pterm} ")"
          | IDENT "(" _maybe_empty_seq{pterm} ")"
          | choice_pterm
          | pterm ("+" | "-") NAT
          | NAT "+" pterm
          | pterm INFIX pterm
          | not_pterm
          | sample_pterm
          | if_pterm
          | let_pterm
          | insert_pterm
          | get_pterm
          | event_pterm
    choice_pterm: "choice[" pterm "," pterm "]"
    if_pterm: "if" pterm "then" pterm [ "else" pterm ]
    not_pterm: "not" "(" pterm ")"
    let_pterm: "let" pattern "=" pterm "in" pterm [ "else" pterm ]
             | IDENT [":" typeid] "<-" pterm ";" pterm
             | "let" typedecl "suchthat" pterm "in" pterm [ "else" pterm ]
    sample_pterm: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" pterm]
                | IDENT "<-R" typeid [";" pterm]
    insert_pterm: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" ";" pterm
    event_pterm: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] ";" pterm
    get_pterm: IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" pterm [ "else" pterm ] ]
    pattern: IDENT [":" typeid]
           | "_" [ ":" typeid ]
           | NAT
           | pattern "+" NAT
           | NAT "+" pattern
           | "(" _maybe_empty_seq{pattern} ")"
           | IDENT "(" _maybe_empty_seq{pattern} ")"
           | "=" pterm
    mayfailterm: term
               | "fail"
    mayfailterm_seq: "(" _non_empty_seq{mayfailterm} ")"
    typedecl: _non_empty_seq{IDENT} ":" typeid [ "," typedecl ]
    failtypedecl: _non_empty_seq{IDENT} ":" typeid [ "or fail" ] [ "," failtypedecl ]

    decl: type_decl
        | channel_decl
        | free_decl
        | const_decl
        | fun_decl
        | letfun_decl
        | reduc_decl
        | fun_reduc_decl
        | equation_decl
        | pred_decl
        | table_decl
        | let_decl
        | set_settings_decl
        | event_decl
        | query_decl
        | axiom_decl
        | restriction_decl
        | lemma_decl
        | noninterf_decl
        | weaksecret_decl
        | not_decl
        | select_decl
        | noselect_decl
        | nounif_decl
        | elimtrue_decl
        | clauses_decl
        | module_decl
        #| param_decl
        #| proba_decl
        #| letproba_decl
        #| proof_decl
        #| def_decl
        #| expand_decl

    type_decl: "type" IDENT options{OPTIONS_TYPE} "."
    channel_decl: "channel" _non_empty_seq{IDENT} "."
    free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "."
    const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "."
    fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "."
    letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "."
    reduc_decl: "reduc" eqlist options{OPTIONS_FREE_REDUC} "."
    fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options{OPTIONS_FUN_CONST} "."
    equation_decl: "equation" eqlist options{OPTIONS_EQUATION} "."
    pred_decl: "pred" IDENT [ "(" [ _maybe_empty_seq{typeid} ] ")" ] options{OPTIONS_PRED} "."
    table_decl: IDENT "(" _maybe_empty_seq{typeid} ")" "."
    let_decl: "let" IDENT [ "(" [ typedecl ] ")" ] "=" process "."

    BOOL : "true" | "false"
    NONE: "none"
    FULL: "full"
    ALL: "all"
    FUNC: IDENT
    ignoretype_options: BOOL | ALL | NONE | "attacker"
    boolean_settings_names: "privateCommOnPublicTerms"
                            | "rejectChoiceTrueFalse"
                            | "rejectNoSimplif"
                            | "allowDiffPatterns"
                            | "inductionQueries"
                            | "inductionLemmas"
                            | "movenew"
                            | "movelet"
                            | "stopTerm"
                            | "removeEventsForLemma"
                            | "simpEqAll"
                            | "eqInNames"
                            | "preciseLetExpand"
                            | "expandSimplifyIfCst"
                            | "featureFuns"
                            | "featureNames"
                            | "featurePredicates"
                            | "featureEvents"
                            | "featureTables"
                            | "featureDepth"
                            | "featureWidth"
                            | "simplifyDerivation"
                            | "abbreviateDerivation"
                            | "explainDerivation"
                            | "unifyDerivation"
                            | "reconstructDerivation"
                            | "displayDerivation"
                            | "traceBacktracking"
                            | "interactiveSwapping"
                            | "color"
                            | "verboseLemmas"
                            | "abbreviateClauses"
                            | "removeUselessClausesBeforeDisplay"
                            | "verboseEq"
                            | "verboseDestructors"
                            | "verboseTerm"
                            | "verboseStatistics"
                            | "verboseRules"
                            | "verboseBase"
                            | "verboseRedundant"
                            | "verboseCompleted"
                            | "verboseGoalReachable"

    _decl_pair{name, value}: "set" name "=" value "."

    set_settings_boolean_decl: _decl_pair{boolean_settings_names, BOOL}

    ignore_types_values: BOOL | "all" | "none" | "attacker"
    simplify_process_values: BOOL | "interactive"
    precise_actions_values: BOOL | "trueWithoutArgsInNames"
    redundant_hyp_elim_values: BOOL | "beginOnly"
    reconstruct_trace_values: BOOL | "n"
    attacker_values: "active" | "passive"
    key_compromise_values: "none" | "approx" | "strict"
    predicates_implementable: "check" | "nocheck"
    application_values: "instantiate" | "full" | "none" | "discard"
    max_values: "none" | "n"
    sel_fun_values: "TermMaxsize" | "Term"| "NounifsetMaxsize" | "Nounifset"
    redundancy_elim_values: "best" | "simple" | "no"
    nounif_ignore_a_few_times_values: "none" | "auto" | "all"
    nounif_ignore_ntimes_values: "n"
    trace_display_values: "short" | "long" | "none"
    verbose_clauses_values: "none" | "explained" | "short"
    set_settings_decl: set_settings_boolean_decl
                | _decl_pair{"ignoreTypes", ignore_types_values}
                | _decl_pair{"simplifyProcess", simplify_process_values}
                | _decl_pair{"preciseActions", precise_actions_values}
                | _decl_pair{"redundantHypElim", redundant_hyp_elim_values}
                | _decl_pair{"reconstructTrace", reconstruct_trace_values}
                | _decl_pair{"attacker", attacker_values}
                | _decl_pair{"keyCompromise", key_compromise_values}
                | _decl_pair{"predicatesImplementable", predicates_implementable}
                | _decl_pair{"saturationApplication", application_values}
                | _decl_pair{"verificationApplication", application_values}
                | _decl_pair{"maxDepth", max_values}
                | _decl_pair{"maxHyp", max_values}
                | _decl_pair{"selFun", sel_fun_values}
                | _decl_pair{"redundancyElim", redundancy_elim_values}
                | _decl_pair{"nounifIgnoreAFewTimes", nounif_ignore_a_few_times_values}
                | _decl_pair{"nounifIgnoreNtimes", nounif_ignore_ntimes_values}
                | _decl_pair{"traceDisplay", trace_display_values}
                | _decl_pair{"verboseClauses", verbose_clauses_values}
                | set_strategy
                | set_symb_order

    _swap_strategy_seq{x}: x ("->" x)*
    set_strategy: "set" "swapping" "=" _swap_strategy_seq{TAG} "."
    _symb_ord_seq{x}: x (">" x)*
    set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "."

    event_decl: "event" IDENT ["(" _maybe_empty_seq{typeid} ")"] "."
    query_decl: "query" [ typedecl ";"] query options{OPTIONS_QUERY} "."

    axiom_decl:       "axiom"       [ typedecl ";"] lemma options{OPTIONS_AXIOM} "."
    restriction_decl: "restriction" [ typedecl ";"] lemma options{OPTIONS_RESTRICTION} "."
    lemma_decl:       "lemma"       [ typedecl ";"] lemma options{OPTIONS_LEMMA} "."

    noninterf_decl: [ typedecl ";"] _maybe_empty_seq{nidecl} "."
    weaksecret_decl: "weaksecret" IDENT "."
    not_decl: "not" [ typedecl ";"] gterm "."

    INT: NAT | "-" NAT
    select_decl:    "select"    [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
    noselect_decl:  "noselect"  [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
    nounif_decl:    "nounif"    [ typedecl ";"] nounifdecl [ "/" INT ] [ "["_non_empty_seq{nounifoption} "]" ] "."

    elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "."
    clauses_decl: "clauses" clauses "."

    module_decl: "@module" " " IDENT

    # TODO: finish defining these (comes from Cryptoverif)
    #param_decl: "param" _non_empty_seq{IDENT} options "."
    #proba_decl: "proba" IDENT ["(...)"] options "."
    #letproba_decl: "letproba" IDENT ["(...)"] "= ..." "."
    #proof_decl: "proof" "{" proof "}"
    #def_decl: "def" IDENT "(" _maybe_empty_seq{typeid} ")" "{" decl* "}"
    #expand_decl: "expand" IDENT "(" _maybe_empty_seq{typeid} ")" "."

    nidecl: IDENT [ "among" "(" _non_empty_seq{term} ")" ]
    equality: term "=" term
            | "let" IDENT "=" term "in" equality
    mayfailequality: IDENT mayfailterm_seq "=" mayfailterm
    eqlist: [ "forall" typedecl ";" ] equality [ ";" eqlist ]
    clause: term
           | term "->" term
           | term "<->" term
           | term "<=>" term
    clauses: [ "forall" failtypedecl ";" ] clause [ ";" clauses ]
    mayfailreduc: [ "forall" failtypedecl ";" ] mayfailequality [ "otherwise" mayfailreduc ]
    NAT: DIGIT+
    phase: "phase" NAT [";" process]
    TAG: IDENT
    sync: "sync" NAT ["[" TAG "]"] [";" process]
    COMMENT:  /\(\*(\*(?!\))|[^*])*\*\)/
    %import common (WORD, DIGIT, NUMBER, WS) // imports from terminal library
    %ignore WS // Disregard spaces in text
    %ignore COMMENT
""",
    debug=True,
    # lexer_callbacks={"COMMENT": comments.append},
)

# COMMENT:  /\(\*(\*(?!\))|[^*])*\*\)/
# COMMENT:  "(*" /(\*(?!\))|[^*])*/  "*)"
# comment: /\(\*(?:(?!\(\*|\*\)).|(?R))*\*\)/

# TODO Open ProVerif compatibility questions
# TODO * does it allow leading zeros for NAT?
# TODO * tag is not defined? is it ident?
# TODO * are spaces between "event" and ":" allowed?
# TODO * spaces between "nat" and "("? "choice" and "["?


def parsertest(input):
    parsetree = proverif_grammar.parse(input)
    # tree.pydot__tree_to_png(parsetree, name + ".png")
    return parsetree


def parse_main(file_path):
    with open(file_path, "r") as f:
        content = f.read()
        # print(content)
        parsertest(content)