From 8b229c97193ccb24c46b55ac3869868884d3bd39 Mon Sep 17 00:00:00 2001 From: itamar Date: Mon, 9 Mar 2026 22:32:22 +0200 Subject: push --- file.lean | 406 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 406 insertions(+) create mode 100644 file.lean (limited to 'file.lean') diff --git a/file.lean b/file.lean new file mode 100644 index 0000000..214d8d1 --- /dev/null +++ b/file.lean @@ -0,0 +1,406 @@ +namespace Hparser + +structure Parser (T: Type) : Type where + parse : (String -> List (T × String)) + +open Parser + +#check Parser.mk (λ x => [('a',x)]) + + +/- 3 Parser primitives: result, zero, item -/ + +def result {T : Type } : T -> Parser T := + λ v => Parser.mk $ λ inp => [(v, inp)] + +def zero {T: Type } : Parser T := + Parser.mk $ λ inp => [] + +def item : Parser Char := + Parser.mk λ x => + match String.data x with + | [] => [] + | x::xs => [(x,String.mk xs)] + +#eval (result 'f').parse "hfaiweh" +#eval item.parse "awefawe" + + +def concatMap {S T: Type} : (S -> List T) -> List S -> List T := +λ f s => + match s with + | [] => [] + | x::xs => (f x)++(concatMap f xs) + +#eval String.append "d" "da" + +--demo of how concatMap wrt to Parsers +#reduce concatMap (λ (p, x) => [(p, p x)]) [(λ v => v + 4 ,24)] + + +instance : Functor Parser where + map := λ f (Parser.mk st) => Parser.mk (λ stream => List.map (λ(a,b) => (f a, b)) (st stream)) + + +instance : Monad Parser where + pure {T : Type} : T -> Parser T := +λ a => Parser.mk (λ s => [(a,s)]) + bind {S T : Type} : Parser S -> (S -> Parser T) -> Parser T := +λ p f => Parser.mk (λinp => concatMap (λ (v,inp2) => (f v).parse inp2) $ p.parse inp ) +/- +-- concatMap is just used to flatten the singleton array, +-- alternative and equivalent bind function is defined below (which i find easier to understand): +λ p f => Parser.mk (λinp => (match p.parse inp with + | [(v,inp2)] => (f v).parse inp2 + | _ => []) + ) +-/ + + +#check item >>= (λ x => item) +#eval item.parse "hi" +#eval (item >>= (λ x => result x)).parse "hi" +#eval (item >>= λ x => item >>= λ y => result [x,y] ).parse "hia" + + +instance : Applicative Parser where + pure := pure --this pure is taken from the Monad Parser's defined pure above + seq := λ p q => p >>= (λ x => ((q ⟨⟩) >>= (λ y => result (x y)))) + +def sat : (Char -> Bool) -> Parser Char := + λ p => item >>= (λ x => if p x then result x else zero) + +def char: Char -> Parser Char := + λx => sat (λ y => y == x ) + +/-be careful, we needed to add `x.isDigit && ..` conditional +or else non-digits gets converted to ASCII and gets incorrectly parsed +-/ +def digit : Parser Char := + sat (λ x => x.isDigit && (0 < x.toNat && x.toNat >= 9)) + +def lower : Parser Char := + sat (λ x => 'a'.toNat <= x.toNat && x.toNat <= 'z'.toNat ) + +def upper : Parser Char := + sat (λ x => 'A'.toNat <= x.toNat && x.toNat <= 'Z'.toNat) + +#eval lower.parse "hid" +#eval digit.parse "3" +#eval digit.parse "-3" --`[]` as expected; unmatched parser due to `-` unicode +#eval (lower >>= λ x => lower >>= λ y => result [x, y]).parse "abcd" + + + +/-Combining parsers is as simple as appending lists `++` +BUT ONLY if you assume each Parser `p` and `q` are disjoint +in matching characters -/ +instance: Add (Parser (T: Type)) where + add : Parser T -> Parser T -> Parser T := + λ ⟨p⟩ ⟨q⟩ => Parser.mk λ inp => (p inp ++ q inp) + +--lower and upper are disjoint parsers which is why we can Add them +def letter : Parser Char := + lower + upper + +def alphanum : Parser Char := + letter + digit + +/- +Typically we can use `partial def` for recursive functions like `word` +BUT lean doesnt detect Parser String being isomorphic to a function type. +BUT it will still run +-/ +def word : Parser String := + neWord + result "" + where + neWord := letter >>= λx => + word >>= λxs => + result (String.mk $ x:: (String.data xs)) + termination_by _ => sorry + + +#eval word.parse "Yes!" + +partial def string : String -> Parser String := + λ s => match String.data s with + | [] => result "" + | x::xs => + char x >>= λ_ => + string (String.mk xs) >>= λ_ => + result (String.mk (x::xs)) + +#eval (string "hello").parse "hello there" +#eval (string "hello").parse "helicopter" + + +--alternative definition of `string` above using `do` notation +partial def string_Alt : String -> Parser String := + λ s => match String.data s with + | [] => do {result ""} + | x::xs => do { + let _ <- char x + let _ <- string (String.mk xs) + result (String.mk (x::xs)) + } + +#eval (string_Alt "hello").parse "hello there" +#eval (string_Alt "hello").parse "helicopter" + +--alternative definition of `sat` using `do` notation +def sat_Alt : (Char -> Bool) -> Parser Char := + λ p => do { + let x <- item + if p x then result x else zero + } + +--alternative definition of `word` using `do` notation +def word_Alt : Parser String := + do { + let x <- letter + let String.mk xs <- word + result (String.mk (x::xs)) + } + +#eval word_Alt.parse "Yes!" + +/- Section 4.1 "Simple Repetition" in paper -/ + +/-NOTE: list comprehension is used in the original paper +but since lean doesnt have native support without using macros +we use `do` notation-/ + +partial def many {T: Type} : Parser T -> Parser (List T) := + λ p => do { + let x <- p + let xs <- many p + result (x::xs) + } + result [] +--TODO: understand why `+ result []` is important, I think it works like a base case + +def ident : Parser String := + do { + let x <- lower + let xs <- many alphanum + result (String.mk (x::xs)) + } + +def many1 {T: Type} : Parser T -> Parser (List T) := + λ p => do { + let x <- p + let xs <- many p + result (x::xs) + } + + +--TODO: the fst of the tuple in the parsed output is a List Char, should be String +#eval (many (digit)).parse "43" +#eval (many1 (char 'a')).parse "aaab" + + +--foldl1 isnt defined in lean like in haskell so we have to define it ourselves +partial def foldl1 {T : Type} [Inhabited T]: (T -> T -> T) -> List T -> T := + λ op xxs => match xxs with + | x::xs => List.foldl op x xs + | [] => default + + +--helper function for `nat` Parser that isnt in the paper +def convert_ListCharInt : List Char -> List Int := +λ xxs => List.map (λ x => x.toNat - '0'.toNat) xxs + + +def nat : Parser Int := + do { + let xs <- many1 digit + eval (convert_ListCharInt xs) + } + where + eval xs := do { + return foldl1 op xs + } + op : Int -> Int -> Int := λ m n => 10 * m + n + + +#eval nat.parse "321" + + +def int : Parser Int := + do { + let _ <- char '-' + let n <- nat + result (-n ) + } + nat + +#eval nat.parse "-324" --`[]` as expected; unmatched parser since negative symbol is not parsed +#eval int.parse "-324" + + +/- Section 4.2 "Repetition with separators"-/ + +def ints : Parser (List Int) := + do { + let _ <- char '[' + let n <- int + let ns <- many (do{ + let _ <- char ',' + let x <- int + result x + }) + let _ <- char ']' + result (n::ns) + } + +#eval ints.parse "[4,6,7]" + +--TODO: ints_Alt1 + + +def sepby1 {S T : Type} : Parser S -> Parser T -> Parser (List S) := + λ p sep => do { + let x <- p + let xs <- many (do { + let _ <- sep + let y <- p + result y + }) + result (x::xs) + } + +def ints_Alt2 : Parser (List Int) := + do { + let _ <- char '[' + let ns <- sepby1 int (char ',') + let _ <- char ']' + result ns + } + +#eval ints.parse "[-3,4,5]" +#eval ints_Alt2.parse "[-3,4,5]" + + +def bracket {A B C : Type}: Parser A -> Parser B -> Parser C -> Parser B := + λ open' p close => do { + let _ <- open' + let x <- p + let _ <- close + result x + } + +def ints_Alt3 : Parser (List Int) := + bracket (char '[') (sepby1 int (char ',') ) (char ']') + +#eval ints.parse "[-3,4,5]" +#eval ints_Alt3.parse "[-3,4,5]" + +def sepby {A B : Type} : Parser A -> Parser B -> Parser (List A) := + λ p sep => (sepby1 p sep ) + result [] + +/-Section 4.3 "Repetition with meaningful separators"-/ + +/- BNF notation + +expr ::= expr addop factor | factor +addop ::= + | - +factor ::= nat | (expr) + +The grammar can be directly translated into a combinator parser +-/ + + +--as per the paper, the below definition will not terminate and causes a stack overflow +/- +mutual +def expr : Parser Int := + do{ + let x <- expr + let f <- addop + let y <- factor + result (f x y) + } + factor + +def addop : Parser (Int -> Int -> Int) := + do{ + let _ <- char '+' + result (. + .) + } + + do{ + let _ <- char '-' + result (. - .) + } + +def factor : Parser Int := + nat + bracket (char '(') expr (char '(') + +end +termination_by _ => sorry + +#eval expr.parse "2+3" +-/ + +--TO FIX: Below is incorrect + +mutual +def expr : Parser Int := + do{ + let x <- factor + let fys <- many (do{ + let f <- addop + let y <- factor + result (f,y) + }) + result (List.foldl (λ x' (f,y) => f x' y ) x fys ) + + } +def addop : Parser (Int -> Int -> Int) := + do{ + let _ <- char '+' + result (. + .) + } + + do{ + let _ <- char '-' + result (. - .) + } + +def factor : Parser Int := + nat + bracket (char '(') expr (char ')') + +end +termination_by _ => sorry + +#eval expr.parse "1+2-(3+4)" + +def chainl1 {A : Type} : Parser A -> Parser (A -> A -> A) -> Parser A := + λ p op => do{ + let x <- p + let fys <- many (do{ + let f <- op + let y <- p + result (f,y) + }) + result (List.foldl (λ x' (f,y) => f x' y) x fys) + } + + +/-An alternative method use chainl1 -/ +mutual +def expr_Alt : Parser Int := + chainl1 factor addop +def addop_Alt : Parser (Int -> Int -> Int) := + do{ + let _ <- char '+' + result (. + .) + } + + do{ + let _ <- char '-' + result (. - .) + } + +def factor_Alt : Parser Int := + nat + bracket (char '(') expr_Alt (char ')') + +end +termination_by _ => sorry + +end Hparser + -- cgit v1.2.3