Exercises
Filtering Comments Out
Comments in Objective CAML are hierarchical. We can thus comment away
sections of text, including those containing comments. A comment
starts with characters (* and finishes with *). Here's
an example:
(* comment spread
over several
lines *)
let succ x = (* successor function *)
x + 1;;
(* level 1 commented text
let old_succ y = (* level 2 successor function level 2 *)
y +1;;
level 1 *)
succ 2;;
The aim of this exercise is to create a new text without comments. You
are free to choose whatever lexical analysis tool you wish.
-
Write a lexer able to recognize Objective CAML
comments. These start with a
(* and end with a *).
Your lexer should ensure
comments are balanced, that is to say the number of comment
openings equals the number of comment closings.
We are not interested in other constructions in the language which
may contain characters (* and *).
(** fichier comment1.mll **)
{
let ignore_lex lb = ignore (Lexing.lexeme lb)
let traite_normal = ignore_lex
let traite_comment = ignore_lex
exception Commentaires_mal_balances
}
rule normal = parse
"(*" { ignore_lex lexbuf ; commentaire lexbuf ; normal lexbuf }
| "*)" { raise Commentaires_mal_balances }
| _ { traite_normal lexbuf ; normal lexbuf }
| eof { () }
and commentaire = parse
"(*" { ignore_lex lexbuf ; commentaire lexbuf ; commentaire lexbuf }
| "*)" { ignore_lex lexbuf }
| _ { traite_comment lexbuf ; commentaire lexbuf }
| eof { raise Commentaires_mal_balances }
- Write a program which takes a file, reads it,
filters comments away and writes a new file with the remaining text.
(** fichier comment2.mll **)
{
let ignore_lex lb = ignore (Lexing.lexeme lb)
let sortie = ref stdout
let init_sortie f = sortie := f
let traite_normal lb = output_string !sortie (Lexing.lexeme lb)
let traite_comment = ignore_lex
exception Commentaires_mal_balances
}
rule normal = parse
"(*" { ignore_lex lexbuf ; commentaire lexbuf ; normal lexbuf }
| "*)" { raise Commentaires_mal_balances }
| _ { traite_normal lexbuf ; normal lexbuf }
| eof { () }
and commentaire = parse
"(*" { ignore_lex lexbuf ; commentaire lexbuf ; commentaire lexbuf }
| "*)" { ignore_lex lexbuf }
| _ { traite_comment lexbuf ; commentaire lexbuf }
| eof { raise Commentaires_mal_balances }
{
let decommente src dest =
let file_in = open_in src in
let lb = Lexing.from_channel file_in in
let file_out = open_out dest in
init_sortie file_out ;
normal lb ;
close_in file_in ;
close_out file_out ;;
let usage () =
print_string "comment2 filein fileout";
print_newline() ;;
let main () =
if Array.length (Sys.argv) <> 3 then usage ()
else decommente Sys.argv.(1) Sys.argv.(2) ;;
main ();;
}
- In Objective CAML character strings may contain any character,
even the sequences
(* and *). For example, character
string "what(*ever te*)xt" should not be considered a
comment. Modify your lexer to consider
character strings.
(** fichier comment3.mll **)
{
let ignore_lex lb = ignore (Lexing.lexeme lb)
let sortie = ref stdout
let init_sortie f = sortie := f
let traite_normal lb = output_string !sortie (Lexing.lexeme lb)
let traite_comment = ignore_lex
exception Commentaires_mal_balances
}
rule normal = parse
"(*" { ignore_lex lexbuf ; commentaire lexbuf ; normal lexbuf }
| "*)" { raise Commentaires_mal_balances }
| '"' { traite_normal lexbuf ; chaine lexbuf ; normal lexbuf }
| _ { traite_normal lexbuf ; normal lexbuf }
| eof { () }
and commentaire = parse
"(*" { ignore_lex lexbuf ; commentaire lexbuf ; commentaire lexbuf }
| "*)" { ignore_lex lexbuf }
| _ { traite_comment lexbuf ; commentaire lexbuf }
| eof { raise Commentaires_mal_balances }
and chaine = parse
'"' { traite_normal lexbuf ; () }
| "\\\"" { traite_normal lexbuf ; chaine lexbuf }
| _ { traite_normal lexbuf ; chaine lexbuf }
- Use this new lexer to remove comments from an
Objective CAML program .
(** fichier comment4.mll **)
{
let ignore_lex lb = ignore (Lexing.lexeme lb)
let sortie = ref stdout
let init_sortie f = sortie := f
let traite_normal lb = output_string !sortie (Lexing.lexeme lb)
let traite_comment = ignore_lex
exception Commentaires_mal_balances
}
rule normal = parse
"(*" { ignore_lex lexbuf ; commentaire lexbuf ; normal lexbuf }
| "*)" { raise Commentaires_mal_balances }
| '"' { traite_normal lexbuf ; chaine lexbuf ; normal lexbuf }
| _ { traite_normal lexbuf ; normal lexbuf }
| eof { () }
and commentaire = parse
"(*" { ignore_lex lexbuf ; commentaire lexbuf ; commentaire lexbuf }
| "*)" { ignore_lex lexbuf }
| _ { traite_comment lexbuf ; commentaire lexbuf }
| eof { raise Commentaires_mal_balances }
and chaine = parse
'"' { traite_normal lexbuf ; () }
| "\\\"" { traite_normal lexbuf ; chaine lexbuf }
| _ { traite_normal lexbuf ; chaine lexbuf }
{
let decommente src dest =
let file_in = open_in src in
let lb = Lexing.from_channel file_in in
let file_out = open_out dest in
init_sortie file_out ;
normal lb ;
close_in file_in ;
close_out file_out ;;
let usage () =
print_string "comment2 filein fileout";
print_newline() ;;
let main () =
if Array.length (Sys.argv) <> 3 then usage ()
else decommente Sys.argv.(1) Sys.argv.(2) ;;
main ();;
}
Evaluator
We will use ocamlyacc to implement an expression
evaluator. The idea is to perform the evaluation of
expressions directly in the grammar rules.
We choose a (completely parenthesized) prefix arithmetic expression
language with variable arity operators. For example, expression
(ADD e1 e2 .. en) is equivalent to e1 + e2 + .. + en. Plus
and times operators are right-associative and subtraction and
division are left-associative.
- Define in file opn_parser.mly the
parsing and evaluation rules for an expression.
%{
let rec app_right f xs =
match xs with
[x] -> x
| x::xs -> f x (app_right f xs)
| _ -> failwith"missing argument" ;;
let rec app_left f xs =
match xs with
[x] -> x
| x1::x2::xs -> app_left f ((f x1 x2)::xs)
| _ -> failwith"missing argument" ;;
let t = Hashtbl.create 3 ;;
(
Hashtbl.add t "ADD" (app_right (+.));
Hashtbl.add t "SUB" (app_left (-.));
Hashtbl.add t "MUL" (app_right ( *.));
Hashtbl.add t "DIV" (app_left (/.))
) ;;
let apply o vs =
try
(Hashtbl.find t o) vs
with
Not_found -> (Printf.eprintf"Unknown operator %s\n" o; exit(1)) ;;
%}
%token Lpar Rpar
%token <float> Num
%token <string> Atom
%start term
%type <float> term
%type <float list> terms
%%
term :
Num { $1 }
| Lpar Atom terms Rpar
{ (apply $2 $3) }
;
terms :
term { [$1] }
| term terms { $1::$2 }
;
%%
- Define in file opn_lexer.mll the
lexical analysis of expressions.
{
open Opn_parser
}
rule lexer = parse
[' ' '\n'] { lexer lexbuf }
| '(' { Lpar }
| ')' { Rpar }
| '-'?['0'-'9']*'.'?['0'-'9']*
{ Num (float_of_string (Lexing.lexeme lexbuf)) }
| ['A'-'z']+ { Atom (Lexing.lexeme lexbuf) }
- Write a simple main program opn which reads
a line from standard input containing an expression and prints the
result of evaluating the expression.
open Opn_lexer ;;
open Opn_parser ;;
Printf.printf"? "; flush stdout;
let buf = Lexing.from_string (input_line stdin) in
Printf.printf "= %f\n" (Opn_parser.term Opn_lexer.lexer buf) ;;