struct
(** The result of a matching of a regular expression with a string may be:
*) |
type result = (int * string * (string list) * int) option
;;
(** Building*) |
(** Facility for building regular expressions.
The call mkregexp pl gl sl causes the following actions:
|
let mkregexp ?(strict:bool=true) pl gl sl : Str.regexp =
let prefix = String.big (^) pl in
let groups = String.big (^) (List.map (fun x->("\\(" ^ x ^ "\\)")) gl) in
let suffix = String.big (^) sl in
let expr = prefix ^ groups ^ suffix in
let expr = if strict then ("^" ^ expr ^ "$") else expr in
Str.regexp expr
;;
(** Matching*) |
(** The call matched_groups i x returns the list
of substrings of x matching groups starting from the group number i .
See the standard Str.matched_group for more details. *) |
let rec matched_groups i x : (string list) =
try
let g=(Str.matched_group i x) in g::(matched_groups (i+1) x)
with _ -> []
;;
(** The heuristic match_frame r s (a,b) try to match the substring (a,b)
of the string s with the compiled regular expression r . *) |
let match_frame (r:Str.regexp) (s:string) (a,b) : result =
try begin
let s = String.sub s a (b-a+1) in
let i = Str.search_forward r s 0 in
let y = Str.matched_string s in
let j = (Str.match_end ())-1 in Some (a+i,y,(matched_groups 1 s),a+j)
end with Not_found -> None
;;
(** The heuristic match_whole r s (a,b) try to match the whole string s
with the compiled regular expression r . *) |
let match_whole (r:Str.regexp) (s:string) : result =
try
let a = Str.search_forward r s 0 in
let y = Str.matched_string s in
let b = (Str.match_end ())-1 in Some (a,y,(matched_groups 1 s),b)
with Not_found -> None
;;
(** Similar to match_whole but the regular expression is given as a simple string and compiled
on the fly before invoking match_whole . In other words, match_string e s is simpy
a shortcut for match_whole (Str.regexp e) s . *) |
let match_string (expr:string) (s:string) : result =
match_whole (Str.regexp expr) s
;;
(** Extract parts of a string using a regexp containing some group expressions \((..\)) .
If the input string does not match, the empty list is returned.
Example:
*) |
let extract_groups (r:Str.regexp) (s:string) : string list =
match match_whole r s with
| None -> raise Not_found
(* To do: It was "[]". This may break things, and should be considered experimental --L. *)
| Some (a,s,g,b) -> g
;;
(** Boolean versions*) |
(** Boolean versions of matching heuristics (true stands for <>None ). *) |
module Bool = struct
(** Boolean version of the heuristic match_frame . *) |
let match_frame (r:Str.regexp) (s:string) (a,b) : bool = ((match_frame r s (a,b)) <> None)
(** Boolean version of the heuristic match_whole . *) |
let match_whole (r:Str.regexp) (s:string) : bool = ((match_whole r s) <> None)
(** Boolean version of the heuristic match_string . *) |
and match_string (e:string) (s:string) : bool = ((match_whole (Str.regexp e) s) <> None)
end;; (* module Bool *)
(** Stuff*) |
(** minus x y delete the rightmost occurrence of the pattern y into the string x .
Examples: *) |
let minus x y =
let pattern=("\\(.*\\)"^y^"\\(.*\\)") in
match (extract_groups (Str.regexp pattern) x) with
| [] -> x
| l -> String.big (^) l
;;
(** Grep on string lists: only strings matching the pattern are selected.
Examples:
*) |
let grep (e:string) (sl:string list) : string list =
let r = Str.regexp e in
List.filter (Bool.match_whole r) sl
;;
(** Check if a string can be used as an identifier. *) |
let wellFormedName ?(allow_dash=false) =
if allow_dash then
Bool.match_string "^[a-zA-Z][a-zA-Z0-9_\\-]*$"
else
Bool.match_string "^[a-zA-Z][a-zA-Z0-9_]*$";;
end