summaryrefslogtreecommitdiff
path: root/file.lean
blob: 214d8d1fbe1441abbd28596f2045efa8e716a66c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
namespace Hparser

structure Parser (T: Type) : Type  where
  parse : (String -> List (T ×  String))

open Parser

#check Parser.mk (λ x => [('a',x)])


/- 3 Parser primitives: result, zero, item -/

def result {T : Type } : T -> Parser T :=
  λ v => Parser.mk $ λ inp => [(v, inp)]

def zero {T: Type } : Parser T :=
  Parser.mk $ λ inp => []

def item : Parser Char :=
  Parser.mk λ x =>
    match String.data x with
    | [] => []
    | x::xs => [(x,String.mk xs)]

#eval (result 'f').parse "hfaiweh"
#eval item.parse "awefawe"


def concatMap {S T: Type} : (S -> List T) -> List S -> List T :=
λ f s =>
  match s with
  | [] => []
  | x::xs => (f x)++(concatMap f xs)

#eval String.append "d"  "da"

--demo of how concatMap wrt to Parsers
#reduce concatMap (λ (p, x) => [(p, p x)]) [(λ v => v + 4 ,24)]


instance : Functor Parser where
  map := λ f (Parser.mk st) => Parser.mk (λ stream => List.map (λ(a,b) => (f a, b)) (st stream))


instance : Monad Parser where
  pure {T : Type} : T -> Parser T  :=
λ a => Parser.mk (λ s => [(a,s)])
  bind {S T : Type} : Parser S -> (S -> Parser T) -> Parser T :=
λ p f => Parser.mk (λinp => concatMap (λ (v,inp2) => (f v).parse inp2) $ p.parse inp )
/-
-- concatMap is just used to flatten the singleton array,
-- alternative and equivalent bind function is defined below (which i find easier to understand):
λ p f => Parser.mk (λinp =>  (match p.parse inp with
                                      | [(v,inp2)] => (f v).parse inp2
                                      | _ => [])
                                      )
-/


#check item >>= (λ x => item)
#eval item.parse "hi"
#eval (item >>= (λ x => result x)).parse "hi"
#eval (item >>= λ x => item >>= λ y => result [x,y]  ).parse "hia"


instance :  Applicative Parser where
   pure := pure --this pure is taken from the Monad Parser's defined pure above
   seq := λ p q => p >>= (λ x => ((q ⟨⟩) >>= (λ y => result (x y))))

def sat : (Char -> Bool) -> Parser Char :=
  λ p => item >>= (λ x => if p x then result x else zero)

def char: Char -> Parser Char :=
  λx => sat (λ y => y == x )

/-be careful, we needed to add `x.isDigit && ..` conditional
or else non-digits gets converted to ASCII and gets incorrectly parsed
-/
def digit : Parser Char :=
  sat (λ x => x.isDigit && (0 < x.toNat && x.toNat >= 9))

def lower : Parser Char :=
  sat (λ x => 'a'.toNat <= x.toNat && x.toNat <= 'z'.toNat )

def upper : Parser Char :=
  sat (λ x => 'A'.toNat <= x.toNat && x.toNat <= 'Z'.toNat)

#eval lower.parse "hid"
#eval digit.parse "3"
#eval digit.parse "-3" --`[]` as expected; unmatched parser due to `-` unicode
#eval (lower >>= λ x => lower >>= λ y => result [x, y]).parse "abcd"



/-Combining parsers is as simple as appending lists `++`
BUT ONLY if you assume each Parser `p` and `q` are disjoint
in matching characters -/
instance: Add (Parser (T: Type)) where
  add : Parser T -> Parser T -> Parser T :=
    λ ⟨p⟩ ⟨q⟩  => Parser.mk λ inp => (p inp ++ q inp)

--lower and upper are disjoint parsers which is why we can Add them
def letter : Parser Char :=
  lower + upper

def alphanum : Parser Char :=
  letter + digit

/-
Typically we can use `partial def` for recursive functions like `word`
BUT lean doesnt detect Parser String being isomorphic to a function type.
BUT it will still run
-/
def word : Parser String :=
  neWord  + result ""
    where
    neWord := letter >>= λx =>
              word >>= λxs =>
              result (String.mk $ x:: (String.data xs))
    termination_by _ => sorry


#eval word.parse "Yes!"

partial def string : String -> Parser String :=
  λ s => match String.data s with
    | [] => result ""
    | x::xs   =>
               char x >>= λ_ =>
               string (String.mk xs) >>= λ_ =>
               result (String.mk (x::xs))

#eval (string "hello").parse "hello there"
#eval (string "hello").parse "helicopter"


--alternative definition of `string` above using `do` notation
partial def string_Alt : String -> Parser String :=
  λ s => match String.data s with
    | [] => do {result ""}
    | x::xs => do {
                  let _ <- char x
                  let _ <- string (String.mk xs)
                  result (String.mk (x::xs))
          }

#eval (string_Alt "hello").parse "hello there"
#eval (string_Alt "hello").parse "helicopter"

--alternative definition of `sat` using `do` notation
def sat_Alt : (Char -> Bool) -> Parser Char :=
  λ p => do {
            let x <- item
            if p x then result x else zero
  }

--alternative definition of `word` using `do` notation
def word_Alt : Parser String :=
  do {
      let x <- letter
      let String.mk xs <- word
      result (String.mk (x::xs))
  }

#eval word_Alt.parse "Yes!"

/- Section 4.1 "Simple Repetition" in paper -/

/-NOTE: list comprehension is used in the original paper
but since lean doesnt have native support without using macros
we use `do` notation-/

partial def many {T: Type} : Parser T -> Parser (List T) :=
  λ p => do {
            let x <- p
            let xs <- many p
            result (x::xs)
  } + result []
--TODO: understand why `+ result []` is important, I think it works like a base case

def ident : Parser String :=
  do {
    let x <- lower
    let xs <- many alphanum
    result (String.mk (x::xs))
  }

def many1 {T: Type} : Parser T -> Parser (List T) :=
  λ p => do {
      let x <- p
      let xs <- many p
      result (x::xs)
  }


--TODO: the fst of the tuple in the parsed output is a List Char, should be String
#eval (many (digit)).parse "43"
#eval (many1 (char 'a')).parse "aaab"


--foldl1 isnt defined in lean like in haskell so we have to define it ourselves
partial def foldl1 {T : Type} [Inhabited T]: (T -> T -> T) -> List T -> T :=
  λ op xxs => match xxs with
    | x::xs => List.foldl op x xs
    | [] => default


--helper function for `nat` Parser that isnt in the paper
def convert_ListCharInt : List Char -> List Int :=
λ xxs => List.map (λ x => x.toNat - '0'.toNat) xxs


def nat : Parser Int :=
  do {
      let xs <- many1 digit
      eval (convert_ListCharInt xs)
  }
    where
      eval xs := do {
                      return  foldl1 op xs
                    }
      op : Int -> Int -> Int := λ m n => 10 * m + n


#eval nat.parse "321"


def int : Parser Int :=
  do {
      let _ <- char '-'
      let n <- nat
      result (-n )
  } + nat

#eval nat.parse "-324"  --`[]` as expected; unmatched parser since negative symbol is not parsed
#eval int.parse "-324"


/- Section 4.2 "Repetition with separators"-/

def ints : Parser (List Int) :=
  do {
      let _ <- char '['
      let n <- int
      let ns <- many (do{
        let _ <- char ','
        let x <- int
        result x
      })
      let _ <- char ']'
      result (n::ns)
  }

#eval ints.parse "[4,6,7]"

--TODO: ints_Alt1


def sepby1 {S T : Type} : Parser S -> Parser T -> Parser (List S) :=
  λ p sep => do {
                  let x <- p
                  let xs <- many (do {
                    let _ <- sep
                    let y <- p
                    result y
                  })
                  result (x::xs)
                }

def ints_Alt2 : Parser (List Int) :=
  do {
      let _ <- char '['
      let ns <- sepby1 int (char ',')
      let _ <- char ']'
      result ns
  }

#eval ints.parse "[-3,4,5]"
#eval ints_Alt2.parse "[-3,4,5]"


def bracket {A B C : Type}: Parser A -> Parser B -> Parser C -> Parser B :=
  λ open' p close => do {
                        let _ <- open'
                        let x <- p
                        let _ <- close
                        result x
                      }

def ints_Alt3 : Parser (List Int) :=
  bracket (char '[') (sepby1 int (char ',') ) (char ']')

#eval ints.parse "[-3,4,5]"
#eval ints_Alt3.parse "[-3,4,5]"

def sepby {A B : Type} : Parser A -> Parser B -> Parser (List A) :=
  λ p sep => (sepby1 p sep ) + result []

/-Section 4.3 "Repetition with meaningful separators"-/

/- BNF notation

expr    ::=   expr addop factor  |  factor
addop   ::=   +  |  -
factor  ::=   nat  |  (expr)

The grammar can be directly translated into a combinator parser
-/


--as per the paper, the below definition will not terminate and causes a stack overflow
/-
mutual
def expr : Parser Int :=
  do{
      let x <- expr
      let f <- addop
      let y <- factor
      result (f x y)
  } + factor

def addop : Parser (Int -> Int -> Int) :=
  do{
    let _ <- char '+'
    result (. + .)
  } +
  do{
    let _ <- char '-'
    result (. - .)
  }

def factor : Parser Int :=
  nat + bracket (char '(') expr (char '(')

end
termination_by _ => sorry

#eval expr.parse "2+3"
-/

--TO FIX: Below is incorrect

mutual
def expr : Parser Int :=
  do{
    let x <- factor
    let fys <- many (do{
                        let f <- addop
                        let y <- factor
                        result (f,y)
                    })
    result (List.foldl (λ x' (f,y) => f x' y ) x fys )

  }
def addop : Parser (Int -> Int -> Int) :=
  do{
    let _ <- char '+'
    result (. + .)
  } +
  do{
    let _ <- char '-'
    result (. - .)
  }

def factor : Parser Int :=
  nat + bracket (char '(') expr (char ')')

end
termination_by _ => sorry

#eval expr.parse "1+2-(3+4)"

def chainl1 {A : Type} : Parser A -> Parser (A -> A -> A) -> Parser A :=
  λ p op => do{
              let x <- p
              let fys <- many (do{
                                let f <- op
                                let y <- p
                                result (f,y)
                              })
              result (List.foldl (λ x' (f,y) => f x' y) x fys)
  }


/-An alternative method use chainl1 -/
mutual
def expr_Alt : Parser Int :=
  chainl1 factor addop
def addop_Alt : Parser (Int -> Int -> Int) :=
  do{
    let _ <- char '+'
    result (. + .)
  } +
  do{
    let _ <- char '-'
    result (. - .)
  }

def factor_Alt : Parser Int :=
  nat + bracket (char '(') expr_Alt (char ')')

end
termination_by _ => sorry

end Hparser