pushHEAD master

author: itamar <itbaf@proton.me> 2026-03-09 22:32:22 +0200
committer: itamar <itbaf@proton.me> 2026-03-09 22:32:22 +0200
commit: 8b229c97193ccb24c46b55ac3869868884d3bd39 (patch)
tree: 7f2d29441f82b1aa46ac975bbbaa1c3851ef5056 /file.lean
1 files changed, 406 insertions, 0 deletions
diff --git a/file.lean b/file.lean
new file mode 100644
index 0000000..214d8d1
--- /dev/null
+++ b/file.lean
@@ -0,0 +1,406 @@
+namespace Hparser
+
+structure Parser (T: Type) : Type  where
+  parse : (String -> List (T ×  String))
+
+open Parser
+
+#check Parser.mk (λ x => [('a',x)])
+
+
+/- 3 Parser primitives: result, zero, item -/
+
+def result {T : Type } : T -> Parser T :=
+  λ v => Parser.mk $ λ inp => [(v, inp)]
+
+def zero {T: Type } : Parser T :=
+  Parser.mk $ λ inp => []
+
+def item : Parser Char :=
+  Parser.mk λ x =>
+    match String.data x with
+    | [] => []
+    | x::xs => [(x,String.mk xs)]
+
+#eval (result 'f').parse "hfaiweh"
+#eval item.parse "awefawe"
+
+
+def concatMap {S T: Type} : (S -> List T) -> List S -> List T :=
+λ f s =>
+  match s with
+  | [] => []
+  | x::xs => (f x)++(concatMap f xs)
+
+#eval String.append "d"  "da"
+
+--demo of how concatMap wrt to Parsers
+#reduce concatMap (λ (p, x) => [(p, p x)]) [(λ v => v + 4 ,24)]
+
+
+instance : Functor Parser where
+  map := λ f (Parser.mk st) => Parser.mk (λ stream => List.map (λ(a,b) => (f a, b)) (st stream))
+
+
+instance : Monad Parser where
+  pure {T : Type} : T -> Parser T  :=
+λ a => Parser.mk (λ s => [(a,s)])
+  bind {S T : Type} : Parser S -> (S -> Parser T) -> Parser T :=
+λ p f => Parser.mk (λinp => concatMap (λ (v,inp2) => (f v).parse inp2) $ p.parse inp )
+/-
+-- concatMap is just used to flatten the singleton array,
+-- alternative and equivalent bind function is defined below (which i find easier to understand):
+λ p f => Parser.mk (λinp =>  (match p.parse inp with
+                                      | [(v,inp2)] => (f v).parse inp2
+                                      | _ => [])
+                                      )
+-/
+
+
+#check item >>= (λ x => item)
+#eval item.parse "hi"
+#eval (item >>= (λ x => result x)).parse "hi"
+#eval (item >>= λ x => item >>= λ y => result [x,y]  ).parse "hia"
+
+
+instance :  Applicative Parser where
+   pure := pure --this pure is taken from the Monad Parser's defined pure above
+   seq := λ p q => p >>= (λ x => ((q ⟨⟩) >>= (λ y => result (x y))))
+
+def sat : (Char -> Bool) -> Parser Char :=
+  λ p => item >>= (λ x => if p x then result x else zero)
+
+def char: Char -> Parser Char :=
+  λx => sat (λ y => y == x )
+
+/-be careful, we needed to add `x.isDigit && ..` conditional
+or else non-digits gets converted to ASCII and gets incorrectly parsed
+-/
+def digit : Parser Char :=
+  sat (λ x => x.isDigit && (0 < x.toNat && x.toNat >= 9))
+
+def lower : Parser Char :=
+  sat (λ x => 'a'.toNat <= x.toNat && x.toNat <= 'z'.toNat )
+
+def upper : Parser Char :=
+  sat (λ x => 'A'.toNat <= x.toNat && x.toNat <= 'Z'.toNat)
+
+#eval lower.parse "hid"
+#eval digit.parse "3"
+#eval digit.parse "-3" --`[]` as expected; unmatched parser due to `-` unicode
+#eval (lower >>= λ x => lower >>= λ y => result [x, y]).parse "abcd"
+
+
+
+/-Combining parsers is as simple as appending lists `++`
+BUT ONLY if you assume each Parser `p` and `q` are disjoint
+in matching characters -/
+instance: Add (Parser (T: Type)) where
+  add : Parser T -> Parser T -> Parser T :=
+    λ ⟨p⟩ ⟨q⟩  => Parser.mk λ inp => (p inp ++ q inp)
+
+--lower and upper are disjoint parsers which is why we can Add them
+def letter : Parser Char :=
+  lower + upper
+
+def alphanum : Parser Char :=
+  letter + digit
+
+/-
+Typically we can use `partial def` for recursive functions like `word`
+BUT lean doesnt detect Parser String being isomorphic to a function type.
+BUT it will still run
+-/
+def word : Parser String :=
+  neWord  + result ""
+    where
+    neWord := letter >>= λx =>
+              word >>= λxs =>
+              result (String.mk $ x:: (String.data xs))
+    termination_by _ => sorry
+
+
+#eval word.parse "Yes!"
+
+partial def string : String -> Parser String :=
+  λ s => match String.data s with
+    | [] => result ""
+    | x::xs   =>
+               char x >>= λ_ =>
+               string (String.mk xs) >>= λ_ =>
+               result (String.mk (x::xs))
+
+#eval (string "hello").parse "hello there"
+#eval (string "hello").parse "helicopter"
+
+
+--alternative definition of `string` above using `do` notation
+partial def string_Alt : String -> Parser String :=
+  λ s => match String.data s with
+    | [] => do {result ""}
+    | x::xs => do {
+                  let _ <- char x
+                  let _ <- string (String.mk xs)
+                  result (String.mk (x::xs))
+          }
+
+#eval (string_Alt "hello").parse "hello there"
+#eval (string_Alt "hello").parse "helicopter"
+
+--alternative definition of `sat` using `do` notation
+def sat_Alt : (Char -> Bool) -> Parser Char :=
+  λ p => do {
+            let x <- item
+            if p x then result x else zero
+  }
+
+--alternative definition of `word` using `do` notation
+def word_Alt : Parser String :=
+  do {
+      let x <- letter
+      let String.mk xs <- word
+      result (String.mk (x::xs))
+  }
+
+#eval word_Alt.parse "Yes!"
+
+/- Section 4.1 "Simple Repetition" in paper -/
+
+/-NOTE: list comprehension is used in the original paper
+but since lean doesnt have native support without using macros
+we use `do` notation-/
+
+partial def many {T: Type} : Parser T -> Parser (List T) :=
+  λ p => do {
+            let x <- p
+            let xs <- many p
+            result (x::xs)
+  } + result []
+--TODO: understand why `+ result []` is important, I think it works like a base case
+
+def ident : Parser String :=
+  do {
+    let x <- lower
+    let xs <- many alphanum
+    result (String.mk (x::xs))
+  }
+
+def many1 {T: Type} : Parser T -> Parser (List T) :=
+  λ p => do {
+      let x <- p
+      let xs <- many p
+      result (x::xs)
+  }
+
+
+--TODO: the fst of the tuple in the parsed output is a List Char, should be String
+#eval (many (digit)).parse "43"
+#eval (many1 (char 'a')).parse "aaab"
+
+
+--foldl1 isnt defined in lean like in haskell so we have to define it ourselves
+partial def foldl1 {T : Type} [Inhabited T]: (T -> T -> T) -> List T -> T :=
+  λ op xxs => match xxs with
+    | x::xs => List.foldl op x xs
+    | [] => default
+
+
+--helper function for `nat` Parser that isnt in the paper
+def convert_ListCharInt : List Char -> List Int :=
+λ xxs => List.map (λ x => x.toNat - '0'.toNat) xxs
+
+
+def nat : Parser Int :=
+  do {
+      let xs <- many1 digit
+      eval (convert_ListCharInt xs)
+  }
+    where
+      eval xs := do {
+                      return  foldl1 op xs
+                    }
+      op : Int -> Int -> Int := λ m n => 10 * m + n
+
+
+#eval nat.parse "321"
+
+
+def int : Parser Int :=
+  do {
+      let _ <- char '-'
+      let n <- nat
+      result (-n )
+  } + nat
+
+#eval nat.parse "-324"  --`[]` as expected; unmatched parser since negative symbol is not parsed
+#eval int.parse "-324"
+
+
+/- Section 4.2 "Repetition with separators"-/
+
+def ints : Parser (List Int) :=
+  do {
+      let _ <- char '['
+      let n <- int
+      let ns <- many (do{
+        let _ <- char ','
+        let x <- int
+        result x
+      })
+      let _ <- char ']'
+      result (n::ns)
+  }
+
+#eval ints.parse "[4,6,7]"
+
+--TODO: ints_Alt1
+
+
+def sepby1 {S T : Type} : Parser S -> Parser T -> Parser (List S) :=
+  λ p sep => do {
+                  let x <- p
+                  let xs <- many (do {
+                    let _ <- sep
+                    let y <- p
+                    result y
+                  })
+                  result (x::xs)
+                }
+
+def ints_Alt2 : Parser (List Int) :=
+  do {
+      let _ <- char '['
+      let ns <- sepby1 int (char ',')
+      let _ <- char ']'
+      result ns
+  }
+
+#eval ints.parse "[-3,4,5]"
+#eval ints_Alt2.parse "[-3,4,5]"
+
+
+def bracket {A B C : Type}: Parser A -> Parser B -> Parser C -> Parser B :=
+  λ open' p close => do {
+                        let _ <- open'
+                        let x <- p
+                        let _ <- close
+                        result x
+                      }
+
+def ints_Alt3 : Parser (List Int) :=
+  bracket (char '[') (sepby1 int (char ',') ) (char ']')
+
+#eval ints.parse "[-3,4,5]"
+#eval ints_Alt3.parse "[-3,4,5]"
+
+def sepby {A B : Type} : Parser A -> Parser B -> Parser (List A) :=
+  λ p sep => (sepby1 p sep ) + result []
+
+/-Section 4.3 "Repetition with meaningful separators"-/
+
+/- BNF notation
+
+expr    ::=   expr addop factor  |  factor
+addop   ::=   +  |  -
+factor  ::=   nat  |  (expr)
+
+The grammar can be directly translated into a combinator parser
+-/
+
+
+--as per the paper, the below definition will not terminate and causes a stack overflow
+/-
+mutual
+def expr : Parser Int :=
+  do{
+      let x <- expr
+      let f <- addop
+      let y <- factor
+      result (f x y)
+  } + factor
+
+def addop : Parser (Int -> Int -> Int) :=
+  do{
+    let _ <- char '+'
+    result (. + .)
+  } +
+  do{
+    let _ <- char '-'
+    result (. - .)
+  }
+
+def factor : Parser Int :=
+  nat + bracket (char '(') expr (char '(')
+
+end
+termination_by _ => sorry
+
+#eval expr.parse "2+3"
+-/
+
+--TO FIX: Below is incorrect
+
+mutual
+def expr : Parser Int :=
+  do{
+    let x <- factor
+    let fys <- many (do{
+                        let f <- addop
+                        let y <- factor
+                        result (f,y)
+                    })
+    result (List.foldl (λ x' (f,y) => f x' y ) x fys )
+
+  }
+def addop : Parser (Int -> Int -> Int) :=
+  do{
+    let _ <- char '+'
+    result (. + .)
+  } +
+  do{
+    let _ <- char '-'
+    result (. - .)
+  }
+
+def factor : Parser Int :=
+  nat + bracket (char '(') expr (char ')')
+
+end
+termination_by _ => sorry
+
+#eval expr.parse "1+2-(3+4)"
+
+def chainl1 {A : Type} : Parser A -> Parser (A -> A -> A) -> Parser A :=
+  λ p op => do{
+              let x <- p
+              let fys <- many (do{
+                                let f <- op
+                                let y <- p
+                                result (f,y)
+                              })
+              result (List.foldl (λ x' (f,y) => f x' y) x fys)
+  }
+
+
+/-An alternative method use chainl1 -/
+mutual
+def expr_Alt : Parser Int :=
+  chainl1 factor addop
+def addop_Alt : Parser (Int -> Int -> Int) :=
+  do{
+    let _ <- char '+'
+    result (. + .)
+  } +
+  do{
+    let _ <- char '-'
+    result (. - .)
+  }
+
+def factor_Alt : Parser Int :=
+  nat + bracket (char '(') expr_Alt (char ')')
+
+end
+termination_by _ => sorry
+
+end Hparser
+
author	itamar <itbaf@proton.me>	2026-03-09 22:32:22 +0200
committer	itamar <itbaf@proton.me>	2026-03-09 22:32:22 +0200
commit	8b229c97193ccb24c46b55ac3869868884d3bd39 (patch)
tree	7f2d29441f82b1aa46ac975bbbaa1c3851ef5056 /file.lean