diff --git a/marzipan/README.md b/marzipan/README.md index c8963f4..be5eb80 100644 --- a/marzipan/README.md +++ b/marzipan/README.md @@ -4,5 +4,5 @@ * `src/__init__.py` is the new script * call the old script from the Rosenpass repository's root directory with `./analyze.sh` -* call the new script: +* call the new script from the marzipan directory: * `nix run .# -- analyze $repo` where `$repo` is the absolute(?) path to the root directory of the Rosenpass repository. diff --git a/marzipan/TODO.md b/marzipan/TODO.md index c748275..88c82eb 100644 --- a/marzipan/TODO.md +++ b/marzipan/TODO.md @@ -37,6 +37,7 @@ ## Next Steps * integrate marzipan.awk into Python, somehow + * lark parser: multiline comments, how??? * rewrite marzipan.awk into Python/LARK * define a LARK grammar for marzipan.awk rules * write python code for processing marzipan rules, e.g. alias replacement (step: i.pv->o.pv) diff --git a/marzipan/parser.py b/marzipan/parser.py new file mode 100644 index 0000000..6526cc5 --- /dev/null +++ b/marzipan/parser.py @@ -0,0 +1,389 @@ +from lark import Lark, tree, exceptions, Token, Transformer +import sys + +proverif_grammar = Lark( + grammar=""" + PROCESS: "process" + start: decl* PROCESS process + YIELD: "yield" + channel: CHANNEL + CHANNEL: "channel" + """ + + ( + "IDENT: /(?!(among|axiom|channel|choice|clauses|const|def|diff|" + "do|elimtrue|else|equation|equivalence|event|expand|fail|for|forall|" + "foreach|free|fun|get|if|implementation|in|inj-event|insert|lemma|let|" + "letfun|letproba|new|noninterf|noselect|not|nounif|or|otherwise|out|" + "param|phase|pred|proba|process|proof|public vars|putbegin|query|reduc|" + "restriction|secret|select|set|suchthat|sync|table|then|type|weaksecret|" + "yield))[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/" + ) + + """ + ZERO: "0" + INFIX: "||" + | "&&" + | "=" + | "<>" + | "<=" + | ">=" + | "<" + | ">" + typeid: channel + | IDENT + _non_empty_seq{x}: x ("," x)* + _maybe_empty_seq{x}: [ _non_empty_seq{x} ] + + options: [ "[" _non_empty_seq{IDENT} "]" ] + process: ZERO + | YIELD + | IDENT [ "(" _maybe_empty_seq{pterm} ")" ] + | bracketed_process + | piped_process + | replicated_process + | replicated_process_bounds + | sample_process + | if_process + | in_process + | out_process + | let_process + | insert_process + | get_process + | event_process + | phase + | sync + bracketed_process: "(" process ")" + piped_process: process "|" process + replicated_process: "!" process + replicated_process_bounds: "!" IDENT "<=" IDENT process + | "foreach" IDENT "<=" IDENT "do" process + sample_process: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" process] + | IDENT "<-R" typeid [";" process] + let_process: "let" pattern "=" pterm ["in" pterm [ "else" pterm ]] + | IDENT [":" typeid] "<-" pterm [";" process] + | "let" typedecl "suchthat" pterm options [ "in" process [ "else" process ] ] + if_process: "if" pterm "then" process [ "else" process ] + in_process: "(" pterm "," pattern ")" options [ ";" process ] + get_process: IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options [ "in" process [ "else" process ] ] + out_process: "out" "(" pterm "," pterm ")" [ ";" process ] + insert_process: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" [ ";" process ] + event_process: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] [ ";" process ] + term: IDENT + | NAT + | "(" _maybe_empty_seq{term} ")" + | IDENT "(" _maybe_empty_seq{term} ")" + | term ( "+" | "-" ) NAT + | NAT "+" term + | term INFIX term + | "not" "(" term ")" + + query: gterm ["public_vars" _non_empty_seq{IDENT}] [";" query] + | "secret" IDENT ["public_vars" _non_empty_seq{IDENT}] options [";" query] + | "putbegin" "event" ":" _non_empty_seq{IDENT} [";" query] // Opportunistically left a space between "event" and ":", ProVerif might not accept it with spaces. + | "putbegin" "inj-event" ":" _non_empty_seq{IDENT} [";" query] + lemma: gterm [";" lemma] + | gterm "for" "{" "public_vars" _non_empty_seq{IDENT} "}" [";" lemma] + | gterm "for" "{" "secret" IDENT [ "public_vars" _non_empty_seq{IDENT}] "[real_or_random]" "}" [";" lemma] + gterm: ident_gterm + | fun_gterm + | choice_gterm + | infix_gterm + | arith_gterm + | arith2_gterm + | event_gterm + | injevent_gterm + | implies_gterm + | paren_gterm + | sample_gterm + | let_gterm + ident_gterm: IDENT + fun_gterm: IDENT "(" _maybe_empty_seq{gterm} ")" ["phase" NAT] ["@" IDENT] + choice_gterm: "choice" "[" gterm "," gterm "]" + infix_gterm: gterm INFIX gterm + arith_gterm: gterm ( "+" | "-" ) NAT + arith2_gterm: NAT "+" gterm + event_gterm: "event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT] + injevent_gterm: "inj-event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT] + implies_gterm: gterm "==>" gterm + paren_gterm: "(" _maybe_empty_seq{gterm} ")" + sample_gterm: "new" IDENT [ "[" [ gbinding ] "]" ] + let_gterm: "let" IDENT "=" gterm "in" gterm + + gbinding: "!" NAT "=" gterm [";" gbinding] + | IDENT "=" gterm [";" gbinding] + + nounifdecl: "let" IDENT "=" gformat "in" nounifdecl + | IDENT ["(" _maybe_empty_seq{gformat} ")" ["phase" NAT]] + gformat: IDENT + | "*" IDENT + | IDENT "(" _maybe_empty_seq{gformat} ")" + | "choice" "[" gformat "," gformat "]" + | "not" "(" _maybe_empty_seq{gformat} ")" + | "new" IDENT [ "[" [ fbinding ] "]" ] + | "let" IDENT "=" gformat "in" gformat + fbinding: "!" NAT "=" gformat [";" fbinding] + | IDENT "=" gformat [";" fbinding] + nounifoption: "hypothesis" + | "conclusion" + | "ignoreAFewTimes" + | "inductionOn" "=" IDENT + | "inductionOn" "=" "{" _non_empty_seq{IDENT} "}" + + pterm: IDENT + | "(" _maybe_empty_seq{pterm} ")" + | IDENT "(" _maybe_empty_seq{pterm} ")" + | choice_pterm + | pterm ("+" | "-") NAT + | NAT "+" pterm + | pterm INFIX pterm + | not_pterm + | sample_pterm + | if_pterm + | let_pterm + | insert_pterm + | get_pterm + | event_pterm + choice_pterm: "choice[" pterm "," pterm "]" + if_pterm: "if" pterm "then" pterm [ "else" pterm ] + not_pterm: "not" "(" pterm ")" + let_pterm: "let" pattern "=" pterm "in" pterm [ "else" pterm ] + | IDENT [":" typeid] "<-" pterm ";" pterm + | "let" typedecl "suchthat" pterm "in" pterm [ "else" pterm ] + sample_pterm: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" pterm] + | IDENT "<-R" typeid [";" pterm] + insert_pterm: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" ";" pterm + event_pterm: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] ";" pterm + get_pterm: IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options [ "in" pterm [ "else" pterm ] ] + pattern: IDENT [":" typeid] + | "_" [ ":" typeid ] + | NAT + | pattern "+" NAT + | NAT "+" pattern + | "(" _maybe_empty_seq{pattern} ")" + | IDENT "(" _maybe_empty_seq{pattern} ")" + | "=" pterm + mayfailterm: term + | "fail" + mayfailterm_seq: "(" _non_empty_seq{mayfailterm} ")" + typedecl: _non_empty_seq{IDENT} ":" typeid [ "," typedecl ] + failtypedecl: _non_empty_seq{IDENT} ":" typeid [ "or fail" ] [ "," failtypedecl ] + + decl: type_decl + | channel_decl + | free_decl + | const_decl + | fun_decl + | letfun_decl + | reduc_decl + | fun_reduc_decl + | equation_decl + | pred_decl + | table_decl + | let_decl + | set_settings_decl + | event_decl + | query_decl + | axiom_decl + | restriction_decl + | lemma_decl + | noninterf_decl + | weaksecret_decl + | not_decl + | select_decl + | noselect_decl + | nounif_decl + | elimtrue_decl + | clauses_decl + #| param_decl + #| proba_decl + #| letproba_decl + #| proof_decl + #| def_decl + #| expand_decl + + type_decl: "type" IDENT options "." + channel_decl: "channel" _non_empty_seq{IDENT} "." + free_decl: "free" _non_empty_seq{IDENT} ":" typeid options "." + const_decl: "const" _non_empty_seq{IDENT} ":" typeid options "." + fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options "." + letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "." + reduc_decl: "reduc" eqlist options "." + fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options "." + equation_decl: "equation" eqlist options "." + pred_decl: "pred" IDENT [ "(" [ _maybe_empty_seq{typeid} ] ")" ] options "." + table_decl: IDENT "(" _maybe_empty_seq{typeid} ")" "." + let_decl: "let" IDENT [ "(" [ typedecl ] ")" ] "=" process "." + + BOOL : "true" | "false" + NONE: "none" + FULL: "full" + ALL: "all" + FUNC: IDENT + ignoretype_options: BOOL | ALL | NONE | "attacker" + boolean_settings_names: "privateCommOnPublicTerms" + | "rejectChoiceTrueFalse" + | "rejectNoSimplif" + | "allowDiffPatterns" + | "inductionQueries" + | "inductionLemmas" + | "movenew" + | "movelet" + | "stopTerm" + | "removeEventsForLemma" + | "simpEqAll" + | "eqInNames" + | "preciseLetExpand" + | "expandSimplifyIfCst" + | "featureFuns" + | "featureNames" + | "featurePredicates" + | "featureEvents" + | "featureTables" + | "featureDepth" + | "featureWidth" + | "simplifyDerivation" + | "abbreviateDerivation" + | "explainDerivation" + | "unifyDerivation" + | "reconstructDerivation" + | "displayDerivation" + | "traceBacktracking" + | "interactiveSwapping" + | "color" + | "verboseLemmas" + | "abbreviateClauses" + | "removeUselessClausesBeforeDisplay" + | "verboseEq" + | "verboseDestructors" + | "verboseTerm" + | "verboseStatistics" + | "verboseRules" + | "verboseBase" + | "verboseRedundant" + | "verboseCompleted" + | "verboseGoalReachable" + + _decl_pair{name, value}: "set" name "=" value "." + + set_settings_boolean_decl: _decl_pair{boolean_settings_names, BOOL} + + ignore_types_values: BOOL | "all" | "none" | "attacker" + simplify_process_values: BOOL | "interactive" + precise_actions_values: BOOL | "trueWithoutArgsInNames" + redundant_hyp_elim_values: BOOL | "beginOnly" + reconstruct_trace_values: BOOL | "n" + attacker_values: "active" | "passive" + key_compromise_values: "none" | "approx" | "strict" + predicates_implementable: "check" | "nocheck" + application_values: "instantiate" | "full" | "none" | "discard" + max_values: "none" | "n" + sel_fun_values: "TermMaxsize" | "Term"| "NounifsetMaxsize" | "Nounifset" + redundancy_elim_values: "best" | "simple" | "no" + nounif_ignore_a_few_times_values: "none" | "auto" | "all" + nounif_ignore_ntimes_values: "n" + trace_display_values: "short" | "long" | "none" + verbose_clauses_values: "none" | "explained" | "short" + set_settings_decl: set_settings_boolean_decl + | _decl_pair{"ignoreTypes", ignore_types_values} + | _decl_pair{"simplifyProcess", simplify_process_values} + | _decl_pair{"preciseActions", precise_actions_values} + | _decl_pair{"redundantHypElim", redundant_hyp_elim_values} + | _decl_pair{"reconstructTrace", reconstruct_trace_values} + | _decl_pair{"attacker", attacker_values} + | _decl_pair{"keyCompromise", key_compromise_values} + | _decl_pair{"predicatesImplementable", predicates_implementable} + | _decl_pair{"saturationApplication", application_values} + | _decl_pair{"verificationApplication", application_values} + | _decl_pair{"maxDepth", max_values} + | _decl_pair{"maxHyp", max_values} + | _decl_pair{"selFun", sel_fun_values} + | _decl_pair{"redundancyElim", redundancy_elim_values} + | _decl_pair{"nounifIgnoreAFewTimes", nounif_ignore_a_few_times_values} + | _decl_pair{"nounifIgnoreNtimes", nounif_ignore_ntimes_values} + | _decl_pair{"traceDisplay", trace_display_values} + | _decl_pair{"verboseClauses", verbose_clauses_values} + | set_strategy + | set_symb_order + + _swap_strategy_seq{x}: x ("->" x)* + set_strategy: "set" "swapping" "=" _swap_strategy_seq{TAG} "." + _symb_ord_seq{x}: x (">" x)* + set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "." + + event_decl: IDENT ["(" _maybe_empty_seq{typeid} ")"] "." + query_decl: "query" [ typedecl ";"] query options "." + + axiom_decl: "axiom" [ typedecl ";"] lemma options "." + restriction_decl: "restriction" [ typedecl ";"] lemma options "." + lemma_decl: "lemma" [ typedecl ";"] lemma options "." + + noninterf_decl: [ typedecl ";"] _maybe_empty_seq{nidecl} "." + weaksecret_decl: "weaksecret" IDENT "." + not_decl: "not" [ typedecl ";"] gterm "." + + INT: NAT | "-" NAT + select_decl: "select" [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "." + noselect_decl: "noselect" [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "." + nounif_decl: "nounif" [ typedecl ";"] nounifdecl [ "/" INT ] [ "["_non_empty_seq{nounifoption} "]" ] "." + + elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "." + clauses_decl: "clauses" clauses "." + + # TODO: finish defining these (comes from Cryptoverif) + #param_decl: "param" _non_empty_seq{IDENT} options "." + #proba_decl: "proba" IDENT ["(...)"] options "." + #letproba_decl: "letproba" IDENT ["(...)"] "= ..." "." + #proof_decl: "proof" "{" proof "}" + #def_decl: "def" IDENT "(" _maybe_empty_seq{typeid} ")" "{" decl* "}" + #expand_decl: "expand" IDENT "(" _maybe_empty_seq{typeid} ")" "." + + nidecl: IDENT [ "among" "(" _non_empty_seq{term} ")" ] + equality: term "=" term + | "let" IDENT "=" term "in" equality + mayfailequality: IDENT mayfailterm_seq "=" mayfailterm + eqlist: [ "forall" typedecl ";" ] equality [ ";" eqlist ] + clause: term + | term "->" term + | term "<->" term + | term "<=>" term + clauses: [ "forall" failtypedecl ";" ] clause [ ";" clauses ] + mayfailreduc: [ "forall" failtypedecl ";" ] mayfailequality [ "otherwise" mayfailreduc ] + NAT: DIGIT+ + phase: "phase" NAT [";" process] + TAG: IDENT + sync: "sync" NAT ["[" TAG "]"] [";" process] + COMMENT: "(*" /.*/ + %import common (WORD, DIGIT, NUMBER, WS) // imports from terminal library + %ignore WS // Disregard spaces in text + %ignore COMMENT +""", + debug=True, + # lexer_callbacks={"COMMENT": comments.append}, +) + +# TODO Open ProVerif compatibility questions +# TODO * does it allow leading zeros for NAT? +# TODO * tag is not defined? is it ident? +# TODO * are spaces between "event" and ":" allowed? +# TODO * spaces between "nat" and "("? "choice" and "["? + + +def parsertest(input): + parsetree = proverif_grammar.parse(input) + # tree.pydot__tree_to_png(parsetree, name + ".png") + return parsetree + + +def main(): + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + filename = sys.argv[1] + + with open(filename, "r") as f: + content = f.read() + # print(content) + parsertest(content) + + +if __name__ == "__main__": + main()