summaryrefslogtreecommitdiff
blob: 7d20ce4e10f193ba126704fe6ddc8ef97cddc61a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
diff -urwpN regex-base-0.71/doc/lazy.html ghc-6.6.1/libraries/regex-base/doc/lazy.html
--- regex-base-0.71/doc/lazy.html	1970-01-01 01:00:00.000000000 +0100
+++ ghc-6.6.1/libraries/regex-base/doc/lazy.html	2007-04-25 18:24:10.000000000 +0100
@@ -0,0 +1,139 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<HTML>
+  <head>
+    <title>Text.Regex.Lazy</title>
+  </head>
+<body>
+<h1><tt>Text.Regex.Lazy</tt></h1>
+<h2>Version 0.70 (2006-08-10)</h2>
+
+<h3>By Chris Kuklewicz (TextRegexLazy (at) personal (dot) mightyreason (dot) com)</h3>
+
+Changes from 0.66 to 0.70
+<ul>
+  <li> regex-tre added for libtre backend (Text.Regex.TRE), see http://laurikari.net/tre/
+  <li> regex-devel added for tests and benchmarks
+  <li> Text.Regex.*.Wrap APIs improved: the exported wrap* functions
+  never call fail or error under normal circumstances, and use Either
+  types to report errors.  Allocation failures are reported with fail.
+  <li> Text.Regex.*.(ByteString|String) all should export
+  compile/execute/regexec functions which report errors using Either.
+</ul>
+
+Changes from 0.55 to 0.66
+<ul>
+  <li> I broke this into many packages, regex-base for the interface and regex-pcre, regex-posix, regex-parsec, regex-dfa for the four backends and regex-compat to replace Text.Regex(.New)
+  <li> The top level Makefile now can drive setup and installation of all the packages at once.
+</ul>
+
+Changes from 0.44 to 0.55
+<ul>
+  <li> <b>JRegex has been assimilated: PCRE and PosixRE are here</b>.
+  The JRegex-style API rocks, see below and Context.hs and Example.hs
+  <li> Haddock seems to run via ./setup haddock, but the documentation is very thin
+  <li> ./setup test runs TestTestRegexLazy binary if uncommented in cabal file
+  <li> default is now to compile with -Wall -Werror -O2
+  <li> You may need to point the cabal file's "Extra-Lib-Dirs" to point to pcre.
+  <li> You may or may not need a "-lpcre" option to ghc when building
+  projects that depend on Text.Regex.Lazy now.
+</ul>
+
+Changes from 0.33 to 0.44
+<ul>
+  <li> Cabal
+  <li> Compile with -Wall -Werror
+  <li> Change DFAEngineFPS from Data.FastPackedString to Data.ByteString
+</ul>
+See the LICENSE file for details on copyright.  See README for building instructions.
+<br/>
+The new API is very close to JRegex and supports 4 backends:
+<ul>
+  <li> Posix, the standard c regex library
+  <li> PCRE, the <a href="http://www.pcre.org/">Perl Compatible Regular Expressions</a> c library
+  <li> Full, the lazy Parsec based library (see old api below)
+  <li> DFA, the fast lazy matching library (see old api below)
+</ul>
+And for all backends, there are two types that can be used as a source
+of regular expressions or to match a regular expression against:
+String, and ByteString.  The ByteString library will be in the next
+GHC and can be gotten
+from <a
+href="http://www.cse.unsw.edu.au/~dons/fps.html">http://www.cse.unsw.edu.au/~dons/fps.html</a>.
+<p>
+For simplest use of the new API: import Text.Regex.Lazy and one of
+<pre>
+import Text.Regex.PCRE((=~),(=~~))
+import Text.Regex.Parsec((=~),(=~~))
+import Text.Regex.DFA((=~),(=~~))
+import Text.Regex.PosixRE((=~),(=~~))
+import Text.Regex.TRE((=~),(=~~))
+</pre>
+The things you can demand of (=~) and (=~~) are all
+instance defined in Text.Regex.Impl.Context and they are used
+in <tt>Example.hs</tt> as well.
+<p>
+<p>
+You can redefine (=~) and (=~~) to use different options by using makeRegexOpts:
+<pre>
+(=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) => source1 -> source -> target
+(=~) x r = let q :: Regex
+               q = makeRegexOpts (some compoption) (some execoption) r
+           in match q x
+
+(=~~) ::(RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,Monad m) => source1 -> source -> m target
+(=~~) x r = let q :: Regex
+                q = makeRegexOpts (some compoption) (some execoption) r
+            in matchM q x
+</pre>
+There is a medium level API with functions compile/execute/regexec in
+all the Text.Regex.*.(String|ByteString) modules.  These allow for
+errors to be reported as Either types when compiling or running.
+<p>
+The low level APIs are in the Text.Regex.*.Wrap modules.  For the
+c-library backends these expose most of the c-api in wrap* functions
+that make the type more Haskell-like: CString and CStingLen and
+newtypes to specify compile and execute options.  The actual foreign
+calls are not exported; it does not export the raw c api.
+<p>
+Also, Text.Regex.PCRE.Wrap will let you query if it was compiled with
+UTF8 suppor: <tt>configUTF8 :: Bool</tt>.  But I do not provide a way
+to marshall to or from UTF8.  (If you have a UTF8 ByteString then you
+would probably be able to make it work, assuming the indices PCRE uses
+are in bytes, otherwise look at the wrap* functions which are a thin
+layer over the pcreapi).
+<p>
+
+<p>
+The old Text.Regex API is can be replaced. If you need to be drop in
+compatible with <tt>Text.Regex</tt> then you can
+import <tt>Text.Regex.New</tt> and report any infidelities as bugs.
+
+Some advantages of <tt>Text.Regex.Parsec</tt> over <tt>Text.Regex</tt>:
+<ul>
+  <li> It does not marshal to and from c-code arrays, so it is much
+       faster on large input strings.
+  <li> It consumes the input <tt>String</tt> in a mostly lazy manner.
+       This makes streaming from input to output possible.
+  <li> It performs sanity checks so that <tt>subRegex</tt>
+       and <tt>splitRegex</tt> don't loop or go crazy if the pattern
+       matches an empty string -- it will just return the input.
+  <li> If the <tt>String</tt> regex does not parse then you get a nicer error
+       message.
+</ul>
+<p>
+Internally it uses <tt>Parsec</tt> to turn the string regex into
+a <tt>Pattern</tt> data type, simplify the <tt>Pattern</tt>, then
+transform the <tt>Pattern</tt> into a <tt>Parsec</tt> parser that
+accepts matching strings and stores the sub-strings of parenthesized
+groups.
+<p>
+All of this was motivated by the inability to use <tt>Text.Regex</tt>
+to complete
+the <a
+href="http://shootout.alioth.debian.org/gp4/benchmark.php?test=regexdna&lang=all">regex-dna
+benchmark</a> on <a href="http://shootout.alioth.debian.org/">The
+Computer Language Shootout</a>.  The current entry there, by Don
+Stewart and Alson Kemp and Chris Kuklewicz, does not use this Parsec
+solution, but rather a custom DFA lexer from the CTK library.
+</body>
+</HTML>
diff -urwpN regex-base-0.71/doc/README ghc-6.6.1/libraries/regex-base/doc/README
--- regex-base-0.71/doc/README	1970-01-01 01:00:00.000000000 +0100
+++ ghc-6.6.1/libraries/regex-base/doc/README	2007-04-25 18:24:10.000000000 +0100
@@ -0,0 +1,39 @@
+README for TestRegexLazy-0.66
+
+By Chris Kuklewicz (TextRegexLazy (at) personal (dot) mightyreason (dot) com)
+
+For more detail on Text.Regex.Lazy look at the very very outdated
+lazy.html file or the LICENSE file.
+
+To build and install:
+  get Data.ByteString from http://www.cse.unsw.edu.au/~dons/fps.html
+  (You probably want to configure ByteString's cabal with -p for profiling)
+  edit list of BACKENDS in Makefile if you want to exclude regex-tre or regex-pcre
+  edit regex-pcre/regex-pcre.cabal to point to your PCRE installation
+  edit CONF and USER variables in Makefile to point to your setup
+  (The CONF includes -p for profiling)
+  run "make all" which will create and install all the packages in $(SUBDIRS)
+
+The packages:
+  regex-base : This hold the type class definitions and (most) RegexContext,Extract instances
+  regex-compat : Builds Text.Regex.New (soon to replace Text.Regex) on top of regex-parsec
+  regex-pcre : Build the PCRE backend, http://www.pcre.org/
+  regex-posix : Builds the Posix backend
+  regex-parsec : Builds my lazy parsec based pure haskell backend
+  regex-dfa : Build the simple backend based on CTKLight (this is LGPL)
+
+There is an additional "regex-devel" package where I am setting up
+testing and bechmarking. Use "make regex-devel" at the top level to
+compile (not install), or use its cabal Setup.hs.
+regex-devel/bench/runbench.sh is my simple toy benchmark.
+
+To use =~ and =~~ new API:
+
+> import Text.Regex.(Parsec|DFA|PCRE|PosixRE|TRE)
+and perhaps
+> import Text.Regex.Base
+
+Look at Example*.hs and instances in Text.Regex.Base.Context.hs for what it can do.
+
+For old "Text.Regex" API drop in compatibility, import Text.Regex.New (uses PosixRE backend)
+
diff -urwpN regex-base-0.71/doc/Redesign.txt ghc-6.6.1/libraries/regex-base/doc/Redesign.txt
--- regex-base-0.71/doc/Redesign.txt	1970-01-01 01:00:00.000000000 +0100
+++ ghc-6.6.1/libraries/regex-base/doc/Redesign.txt	2007-04-25 18:24:10.000000000 +0100
@@ -0,0 +1,14 @@
+The regular expression stuff needs some of a rethink.
+
+Things that could be made more efficient, as I think of them:
+
+(1) Making Arrays in Wrap* may be a bit inefficient
+counter: Usage may be like "look up element 3" so random access is good
+
+(2) String DFA: the findRegex computes the prefix string itself, which is sometimes wasted / sometimes wanted / always discarded.  Also, the input string at the start of the match is discarded
+
+(3) Lazy computes MatchedStrings array then discards it.  Wasteful.
+
+(4) Mighty extend RegexLike with ability to return "strings", i.e. Extract instance.  The default conversion could be left in for some things.   Then RegexContext could pull from that instead of matchOnce/matchAll.
+
+(5) make RegexLike default matchAll/matchOnce in terms of matchOnceText and matchAllText
diff -urwpN regex-base-0.71/examples/Example2.hs ghc-6.6.1/libraries/regex-base/examples/Example2.hs
--- regex-base-0.71/examples/Example2.hs	1970-01-01 01:00:00.000000000 +0100
+++ ghc-6.6.1/libraries/regex-base/examples/Example2.hs	2007-04-25 18:24:10.000000000 +0100
@@ -0,0 +1,44 @@
+{-# OPTIONS_GHC -fglasgow-exts #-}
+import Text.Regex.Base
+import Text.Regex.Posix(Regex,(=~),(=~~)) -- or DFA or PCRE or PosixRE
+import qualified Data.ByteString.Char8 as B(ByteString,pack)
+
+-- Show mixing of ByteString and String as well as polymorphism:
+
+main = let x :: (RegexContext Regex String target) => target
+           x = ("abaca" =~ B.pack "(.)a")
+           x' :: (RegexContext Regex String target,Monad m) => m target
+           x' = ("abaca" =~~ "(.)a")
+           y :: (RegexContext Regex B.ByteString target) => target
+           y = (B.pack "abaca" =~ "(.)a")
+           y' :: (RegexContext Regex B.ByteString target,Monad m) => m target
+           y' = (B.pack "abaca" =~~ B.pack "(.)a")
+       in do print (x :: Bool)
+             print (x :: Int)
+             print (x :: [MatchArray])
+             print (x' :: Maybe (String,String,String,[String]))
+             print (y :: Bool)
+             print (y :: Int)
+             print (y :: [MatchArray])
+             print (y' :: Maybe (B.ByteString,B.ByteString,B.ByteString,[B.ByteString]))
+
+{- Output is, except for replacing Full with DFA (which has no capture)
+True
+2
+[array (0,1) [(0,(1,2)),(1,(1,1))],array (0,1) [(0,(3,2)),(1,(3,1))]]
+Just ("a","ba","ca",["b"])
+True
+2
+[array (0,1) [(0,(1,2)),(1,(1,1))],array (0,1) [(0,(3,2)),(1,(3,1))]]
+Just ("a","ba","ca",["b"])
+-}
+{- The output for DFA is
+True
+2
+[array (0,0) [(0,(1,2))],array (0,0) [(0,(3,2))]]
+Just ("a","ba","ca",[])
+True
+2
+[array (0,0) [(0,(1,2))],array (0,0) [(0,(3,2))]]
+Just ("a","ba","ca",[])
+-}
diff -urwpN regex-base-0.71/examples/Example3.lhs ghc-6.6.1/libraries/regex-base/examples/Example3.lhs
--- regex-base-0.71/examples/Example3.lhs	1970-01-01 01:00:00.000000000 +0100
+++ ghc-6.6.1/libraries/regex-base/examples/Example3.lhs	2007-04-25 18:24:10.000000000 +0100
@@ -0,0 +1,21 @@
+> {-# OPTIONS_GHC -fglasgow-exts #-}
+
+> import Text.Regex.Base
+
+> import qualified Text.Regex.PCRE as R
+> import qualified Text.Regex.PosixRE as S
+> import qualified Text.Regex.Parsec as F
+
+Choose which library to use depending on presence of PCRE library.
+
+> (=~) :: (RegexMaker R.Regex R.CompOption R.ExecOption a,RegexContext R.Regex b t
+>         ,RegexMaker F.Regex F.CompOption F.ExecOption a,RegexContext F.Regex b t
+>         ,RegexMaker S.Regex S.CompOption S.ExecOption a,RegexContext S.Regex b t)
+>      => b -> a -> t
+> (=~) = case R.getVersion of
+>          Just _ -> (R.=~)
+>          Nothing -> case S.getVersion of
+>                       Just _ -> (S.=~)
+>                       Nothing -> (F.=~)
+
+> main = print ("abc" =~ "(.)c" :: Bool)
\ No newline at end of file
diff -urwpN regex-base-0.71/examples/Example.hs ghc-6.6.1/libraries/regex-base/examples/Example.hs
--- regex-base-0.71/examples/Example.hs	1970-01-01 01:00:00.000000000 +0100
+++ ghc-6.6.1/libraries/regex-base/examples/Example.hs	2007-04-25 18:24:10.000000000 +0100
@@ -0,0 +1,14 @@
+{-# OPTIONS_GHC -fglasgow-exts #-}
+import Text.Regex.Base
+import Text.Regex.Posix((=~),(=~~)) -- or DFA or PCRE or PosixRE
+import qualified Data.ByteString.Char8 as B(ByteString,pack)
+
+main = let b :: Bool
+           b = ("abaca" =~ "(.)a")
+           c :: [MatchArray]
+           c = ("abaca" =~ "(.)a")
+           d :: Maybe (String,String,String,[String])
+           d = ("abaca" =~~ "(.)a")
+       in do print b
+             print c
+             print d
diff -urwpN regex-base-0.71/Makefile ghc-6.6.1/libraries/regex-base/Makefile
--- regex-base-0.71/Makefile	1970-01-01 01:00:00.000000000 +0100
+++ ghc-6.6.1/libraries/regex-base/Makefile	2007-04-25 18:24:10.000000000 +0100
@@ -0,0 +1,20 @@
+TOP=..
+include $(TOP)/mk/boilerplate.mk
+
+SUBDIRS = 
+
+ALL_DIRS = \
+	Text/Regex \
+	Text/Regex/Base
+
+PACKAGE 	= regex-base
+VERSION 	= 0.72
+PACKAGE_DEPS 	= base
+
+EXCLUDED_SRCS	= Setup.hs
+
+SRC_HC_OPTS += -cpp 
+
+SRC_HADDOCK_OPTS += -t "Haskell Hierarchical Libraries ($(PACKAGE) package)"
+
+include $(TOP)/mk/target.mk
diff -urwpN regex-base-0.71/package.conf.in ghc-6.6.1/libraries/regex-base/package.conf.in
--- regex-base-0.71/package.conf.in	1970-01-01 01:00:00.000000000 +0100
+++ ghc-6.6.1/libraries/regex-base/package.conf.in	2007-04-25 18:24:10.000000000 +0100
@@ -0,0 +1,27 @@
+name:		PACKAGE
+version:	VERSION
+license:	BSD3
+maintainer:	TextRegexLazy@personal.mightyreason.com
+exposed:	True
+
+exposed-modules:        Text.Regex.Base
+                        Text.Regex.Base.RegexLike
+                        Text.Regex.Base.Context
+                        Text.Regex.Base.Impl
+
+hidden-modules:
+
+import-dirs:	IMPORT_DIR
+library-dirs:	LIB_DIR
+hs-libraries:   "HSregex-base"
+extra-libraries:
+include-dirs:
+includes:		
+depends:	 base
+hugs-options:
+cc-options:
+ld-options:
+framework-dirs:
+frameworks:
+haddock-interfaces:	HADDOCK_IFACE
+haddock-html:		HTML_DIR
diff -urwpN regex-base-0.71/prologue.txt ghc-6.6.1/libraries/regex-base/prologue.txt
--- regex-base-0.71/prologue.txt	1970-01-01 01:00:00.000000000 +0100
+++ ghc-6.6.1/libraries/regex-base/prologue.txt	2007-04-25 18:24:10.000000000 +0100
@@ -0,0 +1 @@
+Interfaces for regular expressions
diff -urwpN regex-base-0.71/regex-base.cabal ghc-6.6.1/libraries/regex-base/regex-base.cabal
--- regex-base-0.71/regex-base.cabal	2006-12-05 18:29:02.000000000 +0000
+++ ghc-6.6.1/libraries/regex-base/regex-base.cabal	2007-04-25 18:24:10.000000000 +0100
@@ -2,7 +2,7 @@
 -- To fix for cabal < 1.1.4 comment out the Extra-Source-Files line
 -- ****************************************************************
 Name:                   regex-base
-Version:                0.71
+Version:                0.72
 -- Cabal-Version:       >=1.1.4
 License:                BSD3
 License-File:           LICENSE
@@ -28,7 +28,7 @@ Buildable:              True
 -- Other-Modules:
 -- ********* Be backward compatible until 6.4.2 is futher deployed
 -- HS-Source-Dirs:         "."
-Extensions:             MultiParamTypeClasses, FunctionalDependencies
+Extensions:             MultiParamTypeClasses, FunctionalDependencies, CPP
 -- GHC-Options:            -Wall -Werror
 GHC-Options:            -Wall -Werror -O2
 -- GHC-Options:            -Wall -ddump-minimal-imports
diff -urwpN regex-base-0.71/Text/Regex/Base/Context.hs ghc-6.6.1/libraries/regex-base/Text/Regex/Base/Context.hs
--- regex-base-0.71/Text/Regex/Base/Context.hs	2006-12-05 18:29:02.000000000 +0000
+++ ghc-6.6.1/libraries/regex-base/Text/Regex/Base/Context.hs	2007-04-25 18:24:10.000000000 +0100
@@ -185,9 +185,12 @@ instance (RegexLike a b) => RegexContext
   match r s = maybe (-1,0) (!0) (matchOnce r s)
   matchM r s = maybe regexFailed (return.(!0)) (matchOnce r s)
 
+#if __GLASGOW_HASKELL__
+-- overlaps with instance (RegexLike a b) => RegexContext a b (Array Int b)
 instance (RegexLike a b) => RegexContext a b MatchArray where 
   match r s = maybe nullArray id (matchOnce r s)
   matchM r s = maybe regexFailed return (matchOnce r s)
+#endif
 
 instance (RegexLike a b) => RegexContext a b (b,MatchText b,b) where 
   match r s = maybe (s,nullArray,empty) id (matchOnceText r s)
@@ -216,21 +219,27 @@ instance (RegexLike a b) => RegexContext
            , mrMatch = whole
            , mrAfter = post
            , mrSubs = fmap fst ma
-           , mrSubList = tail (map fst subs) })
+           , mrSubList = map fst subs })
 
 -- ** Instances based on matchAll,matchAllText
 
+#if __GLASGOW_HASKELL__
+-- overlaps with instance (RegexLike a b) => RegexContext a b [Array Int b]
 instance (RegexLike a b) => RegexContext a b [MatchArray] where
   match = matchAll
   matchM = nullFail
+#endif
 
 instance (RegexLike a b) => RegexContext a b [MatchText b] where
   match = matchAllText
   matchM = nullFail
            
+#if __GLASGOW_HASKELL__
+-- overlaps with instance (RegexLike a b) => RegexContext a b [b]
 instance (RegexLike a b) => RegexContext a b [(MatchOffset,MatchLength)] where
   match r s = [ ma!0 | ma <- matchAll r s ]
   matchM = nullFail
+#endif
 
 instance (RegexLike a b) => RegexContext a b [b] where
   match r s = [ fst (ma!0) | ma <- matchAllText r s ]