From 7b1a62b6bb9884a042765cb988e697cdfa693457 Mon Sep 17 00:00:00 2001 From: Benjamin Lipp Date: Tue, 18 Nov 2025 17:55:32 +0100 Subject: [PATCH] feat: do not try to reject reserved words Co-authored-by: Anja Rabich --- marzipan/TODO.md | 4 ++ marzipan/src/parser.py | 83 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 77 insertions(+), 10 deletions(-) diff --git a/marzipan/TODO.md b/marzipan/TODO.md index 2cd2db9..7c58899 100644 --- a/marzipan/TODO.md +++ b/marzipan/TODO.md @@ -39,6 +39,10 @@ * integrate marzipan.awk into Python, somehow * options term special cases (c.f. manual page 133, starting with "fun" term) + * complete with CryptoVerif options + * error when trying with: `nix run .# -- parse ../target/proverif/01_secrecy.entry.i.pv` + * `in(C, Cinit_conf(Ssskm, Spsk, Sspkt, ic));` + * ^ * rewrite marzipan.awk into Python/LARK * define a LARK grammar for marzipan.awk rules * write python code for processing marzipan rules, e.g. alias replacement (step: i.pv->o.pv) diff --git a/marzipan/src/parser.py b/marzipan/src/parser.py index 0ef8476..89c776b 100644 --- a/marzipan/src/parser.py +++ b/marzipan/src/parser.py @@ -2,6 +2,77 @@ import sys from lark import Lark, Token, Transformer, exceptions, tree +# taken from Page 17 in the ProVerif manual +# At the moment, we do not reject a ProVerif model that uses reserved words as identifier, +# because this caused problems with the LARK grammar. We plan to check this in a later +# processing step. +reserved_words = [ + "among", + "axiom", + "channel", + "choice", + "clauses", + "const", + "def", + "diff", + "do", + "elimtrue", + "else", + "equation", + "equivalence", # no rule yet + "event", + "expand", + "fail", + "for", + "forall", + "foreach", + "free", + "fun", + "get", + "if", + "implementation", # no rule yet + "in", + "inj-event", + "insert", + "lemma", + "let", + "letfun", + "letproba", + "new", + "noninterf", + "noselect", + "not", + "nounif", + "or", + "otherwise", + "out", + "param", + "phase", + "pred", + "proba", + "process", + "proof", + "public_vars", + "putbegin", + "query", + "reduc", + "restriction", + "secret", + "select", + "set", + "suchthat", + "sync", + "table", + "then", + "type", + "weaksecret", + "yield", +] + +ident_regex = ( + "/^" + "".join(f"(?!{w}$)" for w in reserved_words) + "[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/" +) + proverif_grammar = Lark( grammar=""" PROCESS: "process" @@ -10,15 +81,7 @@ proverif_grammar = Lark( channel: CHANNEL CHANNEL: "channel" """ - + ( - "IDENT: /(?!(among|axiom|channel|choice|clauses|const|def|diff|" - "do|elimtrue|else|equation|equivalence|event|expand|fail|for|forall|" - "foreach|free|fun|get|if|implementation|in|inj-event|insert|lemma|let|" - "letfun|letproba|new|noninterf|noselect|not|nounif|or|otherwise|out|" - "param|phase|pred|proba|process|proof|public vars|putbegin|query|reduc|" - "restriction|secret|select|set|suchthat|sync|table|then|type|weaksecret|" - "yield))[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/" - ) + + "IDENT: /[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/" + """ ZERO: "0" INFIX: "||" @@ -326,7 +389,7 @@ proverif_grammar = Lark( _symb_ord_seq{x}: x (">" x)* set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "." - event_decl: IDENT ["(" _maybe_empty_seq{typeid} ")"] "." + event_decl: "event" IDENT ["(" _maybe_empty_seq{typeid} ")"] "." query_decl: "query" [ typedecl ";"] query options{OPTIONS_QUERY} "." axiom_decl: "axiom" [ typedecl ";"] lemma options{OPTIONS_AXIOM} "."