Monday, February 23, 2015

(* This is my second post!

I promised to present a Modula-2 program that properly processes mixed end-of-line characters in text files. Text files contain end-of-line characters and possibly horizontal tab characters but no other control characters.

Instead of simply modifying Crunch0, I decided to post an entirely new program.

CountLinesChars uses RawIO.Read and StreamFile. It disallows redirection from stdin. Instead it counts lines and characters in input text files whose names are specified as command line arguments.

This program is supposed to give a correct count regardless of the combination of end-of-line characters, where eol = cr [ lf ] | lf . Like Crunch0, it writes to stdout. *)

MODULE CountLinesChars;

(* Uses RawIO.Read and StreamFile *)

(* Outputs count of the number of lines and characters in each input text file *)
(* whose name is specified on the command line. *)

(* Outputs count of the total lines and characters in all the input text files. *)

(* input: text files only. *)
(* output: always to stdout. *)
(* stdin: only from a filename argument on the command-line. *)

(* Text files contain end-of-line characters and possibly horizontal tab *)
(* characters but no other control characters. *)

(* 1. input end-of-line characters are counted in the character count (nc) *)
(* input eol = cr [ lf ] | lf . *)
(* so cr lf = 2 in the character count (nc) *)
(* but eol only counts as 1 in the line count (nl) *)

(* 2. Last line of file is not counted in line count if not terminated in eol. *)

(* Tested on: *)
(* ISO m2: p1 compiler version 9.2/Apple Mac OS X 10.7.5 *)
(* Not tested on gm2 or other ISO m2 systems. *)

(* License: This software is released under the following M.I.T. License:

The MIT License (MIT)

Copyright (c) 2015 Carl M. Glassberg

Permission is hereby granted, free of charge, to any person obtaining a copy of this
software and associated documentation files (the "Software"), to deal in the Software
without restriction, including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or
substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. *)

  IMPORT StreamFile, RawIO, TextIO, IOResult, StdChans, ProgramArgs, STextIO, SWholeIO;

  CONST
    EOL = 36C; (* CHR(30) = 1EX *)

    cr = 15C;  (* CHR(13) = 0DX, carriage return *)
    lf = 12C;  (* CHR(10) = 0AX, linefeed *)

  VAR
    inf : StreamFile.ChanId;
    ifn : ARRAY [0 .. 79] OF CHAR;
    result : StreamFile.OpenResults;

    n : CARDINAL; (* nr. of input files *)

    nl : CARDINAL; (* line count *)
    nc : CARDINAL; (* character count *)

    ml : CARDINAL; (* total line count *)
    mc : CARDINAL; (* total character count *)

    ch : CHAR;

  PROCEDURE OutputCount(ifn : ARRAY OF CHAR);
  BEGIN
    SWholeIO.WriteCard(nl, 8);
    SWholeIO.WriteCard(nc, 8);
    STextIO.WriteString(" ");
    STextIO.WriteString(ifn);
    STextIO.WriteLn
  END OutputCount;

  PROCEDURE OutputTotal;
  BEGIN
    SWholeIO.WriteCard(ml, 8);
    SWholeIO.WriteCard(mc, 8);
    STextIO.WriteString(" total");
    STextIO.WriteLn
  END OutputTotal;

  PROCEDURE WriteErrMsg (str : ARRAY OF CHAR);
    (* output to stderr *)
  BEGIN
    TextIO.WriteString(StdChans.StdErrChan(), str)
  END WriteErrMsg;

  PROCEDURE WriteErrLn;
    (* output to stderr *)
  BEGIN
    TextIO.WriteLn(StdChans.StdErrChan());
  END WriteErrLn;

  PROCEDURE done () : BOOLEAN;
    (* inspect input from named file; return TRUE if not at end-of-file *)
  BEGIN
    RETURN (IOResult.ReadResult(inf) # IOResult.endOfInput)
  END done;

  PROCEDURE read_char(VAR ch : CHAR);
    (* return nul character CHR(0) at end-of-file; no end-of-line processing *)
  BEGIN
    RawIO.Read(inf, ch);
    IF NOT(done()) THEN
      ch := 0C
    END (* if *)
  END read_char;
         
  PROCEDURE Process;
  BEGIN
    read_char(ch);
    WHILE done() & (ch # 0C) DO
      INC(nc);

      IF (ch = cr) THEN (* end-of-line *)
        INC(nl);
        read_char(ch);
        INC(nc);
        IF (ch = lf) THEN
          read_char(ch)
        END (* if *)

      ELSIF (ch = lf) THEN (* end-of-line *)
        INC(nl);
        read_char(ch)

      ELSE (* not end-of-line *)
        read_char(ch)
      END (* if *);
                       
    END (* while *)
  END Process;

  PROCEDURE Help;
  BEGIN
    WriteErrLn;
    WriteErrMsg('Usage: "CountLinesChars" ifn { ifn } ');
    WriteErrLn;
    WriteErrMsg('   or  "CountLinesChars" ifn { ifn } ">" ofn');
    WriteErrLn
  END Help;

BEGIN
  ProgramArgs.NextArg();
  (* first argument is tool name so skip. *)

  IF ProgramArgs.IsArgPresent() THEN
    (* command-line arguments are named input files. *)
    ml := 0; mc := 0;
    n := 0;
    LOOP
      TextIO.ReadToken(ProgramArgs.ArgChan(), ifn);
      StreamFile.Open(inf, ifn, StreamFile.read + StreamFile.raw, result);
      IF result # StreamFile.opened THEN
        WriteErrLn;
        WriteErrMsg('Could not open input file: ');
        WriteErrMsg(ifn);
        EXIT
      END (* if *);

      nc := 0; nl := 0;
      INC(n);

      Process;
      OutputCount(ifn);

      INC(ml, nl); INC(mc, nc);

      StreamFile.Close(inf);
      ProgramArgs.NextArg();
      IF NOT (ProgramArgs.IsArgPresent()) THEN
        EXIT
      END (* if *)
    END (* loop *);
    IF n > 1 THEN OutputTotal END (* if *)
  ELSE
    Help
  END (* if *)
END CountLinesChars .


    (* Welcome to my first blog posting at my new blog site. 

    This blog will focus on Modula-2 programming. I hope later to post examples showing 
    how  to call assembly language from Modula-2.

Modula-2 systems that I plan to use in the example m2 programs I post to this site:   
1.) p1 Modula-2, which runs on the Apple MacIntosh under OS X.   
2.) ADW Modula-2, which runs under Microsoft Windows.  
3.) Both systems support 32-bit and 64-bit Intel platforms. 

    Crunch0, is an example m2 program taken from the Google code site

David Egan Evans has pointed-out that my ISO m2 text filters posted to that site do not 
properly handle mixed end-of-line control characters.

In fact, one of my original goals was to process end-of-lines (eol { eol }), where eol is 
defined by the EBNF rule, eol = cr [ lf ] | lf .
But I failed to fullfill that objective---those text filters only work for the system end-of-line 
character, the line-feed (lf) in the case of p1 Modula2. Carriage return (cr) is not processed  
correctly.
In my next blog posting, I will post a modified version of Crunch0, called Crunch1, which 
does properly process lines terminated by any combination of cr and lf, using the above 
definition of eol.

Carl Glassberg *)

 
MODULE Crunch0;
 
 (* Summary: *)
 
 (* Text filter, ISO Modula-2. *)
 (* input: text files only. *)
 (* output is always to stdout. *)


 (* Limitations: on the p1 system, Crunch0 works correctly only *)
 (* on input text files whose lines are all terminated by the *)
 (* system end-of-line (\n = lf, newline, 12C = CHR(10) = 0AX *) 

 (* Text files contain end-of-line characters and possibly horizontal tab characters but no other control characters. *)
 
 (* Details: *)
 
 (* 1. Trim: delete leading and trailing space--- delete sp (40C), nbsp (240C)
       and ht (11C) at beginning and end of a line. *)
 
 (* 2. Squeeze: replace adjacent sp, nbsp and ht within a line with a single 40C *)
 
 (* 3. Delete non end-of-line control characters in the range 0C .. 37C + 177C
       and delete control characters 200C .. 237C *)
 
 (* 4. Crush: replace adjacent blank lines by a single blank line. *)

 
 (* License: This software is released under the following M.I.T. License:
 
 The MIT License (MIT)
 
 Copyright (c) 2015 Carl M. Glassberg
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of this
 software and associated documentation files (the "Software"), to deal in the Software
 without restriction, including without limitation the rights to use, copy, modify, merge,
 publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
 to whom the Software is furnished to do so, subject to the following conditions:
 
 The above copyright notice and this permission notice shall be included in all copies or
 substantial portions of the Software.
 
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
 FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 DEALINGS IN THE SOFTWARE. *) 
 
 (* Tested on:
      p1 compiler version 9.1O/Apple Mac OS X 10.7.5 
      Not tested on gm2 or other ISO m2 systems.
 *)
 
   IMPORT StreamFile, TextIO, IOResult, StdChans, ProgramArgs, STextIO, SIOResult;
 
   CONST
     EOL = 36C; (* CHR(30) = 1EX *)
     ht = 11C; (* CHR(9) = 9X *)
     sp = 40C; (* CHR(32) = 20X *)
     nbsp = 240C; (* CHR(160) = 0A0X *)
     del = 177C; (* CHR(127) = 7FX *)
 
   TYPE
     DoneFuncType = PROCEDURE () : BOOLEAN;
     ReadProcType = PROCEDURE (VAR CHAR);
  
   VAR
     done : DoneFuncType;
     read : ReadProcType;
     inf : StreamFile.ChanId;
     ifn : ARRAY[0..79] OF CHAR;
     result : StreamFile.OpenResults;
     ch : CHAR;
 
 
   PROCEDURE done0() : BOOLEAN;
     (* inspect input from stdin; return TRUE if not at end-of-file *)
   BEGIN
     RETURN (SIOResult.ReadResult() # SIOResult.endOfInput)
   END done0;
 
 
   PROCEDURE eol0() : BOOLEAN;
     (* inspect input from stdin; return TRUE if at end-of-line *)
   BEGIN
     RETURN (SIOResult.ReadResult() = SIOResult.endOfLine)
   END eol0;
 
 
   PROCEDURE read_char0(VAR ch : CHAR);
     (* input from stdin *)
   BEGIN
     STextIO.ReadChar(ch);
     IF eol0() THEN
       ch := EOL;
       STextIO.SkipLine

     ELSIF NOT(done0()) THEN
       ch := 0C
     END (* if *)
   END read_char0;
 
 
   PROCEDURE write_char0(ch : CHAR);
     (* output to stdout *)
   BEGIN
     IF (sp <= ch) & (ch < del) THEN
       (* printable ASCII including sp *)
       STextIO.WriteChar(ch)

     ELSIF (240C < ch) & (ch <= 377C) THEN
       (* printable Latin1 not including nbsp *)
       STextIO.WriteChar(ch)

     ELSIF ch = EOL THEN
       STextIO.WriteLn

     ELSIF (ch = ht) OR (ch = nbsp) THEN
       STextIO.WriteChar(sp)

     ELSE (* control characters: c0 + c1 - eol - EOL - ht - nbsp *)
       (* delete other control characters *)
     END (* if *)
   END write_char0;
 
 
   PROCEDURE WriteErrMsg(str: ARRAY OF CHAR);
     (* output to stderr *)
   BEGIN
     TextIO.WriteString(StdChans.StdErrChan(), str)
   END WriteErrMsg;
 
 
   PROCEDURE WriteErrLn;
     (* output to stderr *)
   BEGIN
     TextIO.WriteLn(StdChans.StdErrChan());
   END WriteErrLn;
 
 
   PROCEDURE done1() : BOOLEAN;
     (* inspect input from named file; return TRUE if not at end-of-file *)
   BEGIN
     RETURN (IOResult.ReadResult(inf) # IOResult.endOfInput)
   END done1;
 
 
   PROCEDURE eol1() : BOOLEAN;
     (* inspect input from named file; return TRUE if at end-of-line *)
   BEGIN
     RETURN (IOResult.ReadResult(inf) = IOResult.endOfLine)
   END eol1;
 
 
   PROCEDURE read_char1(VAR ch : CHAR);
     (* input from named file *)
   BEGIN
     TextIO.ReadChar(inf, ch);
     IF eol1() THEN
       ch := EOL;
       TextIO.SkipLine(inf)

     ELSIF NOT(done1()) THEN
       ch := 0C
     END (* if *)
   END read_char1;
 
 
   PROCEDURE Process;
   BEGIN
     read(ch);
     WHILE (done()) DO
       REPEAT
         WHILE (ch = sp) OR (ch = nbsp) OR (ch = ht) DO
           read(ch)
         END (* while *);
         IF (ch = EOL) THEN read(ch) END (* if *)
       UNTIL (ch # EOL) & (ch # sp) & (ch # nbsp) & (ch # ht);
       (* skip 1 or more adjacent blank lines *)
 
       WHILE (done()) & (ch # EOL) DO
         write_char0(ch);
         REPEAT
           read(ch);
           IF ((sp < ch) & (ch < del)) THEN
             (* non-sp ASCII print character: *)
             write_char0(ch)
           ELSIF (nbsp < ch) & (ch <= 377C) THEN
             (* non-nbsp Latin-1 (print) character: *)
             write_char0(ch)
           END (* if *);
         UNTIL (ch = sp) OR (ch = nbsp) OR (ch = ht) OR (ch = EOL) OR NOT(done());
         (* output non-sp and non-nbsp print characters *)
 
         IF (done()) & (ch # EOL) THEN
           (* squeeze intra-line sp, nbsp, ht (& delete trailing sp, nbsp, ht): *)
           read(ch);
           WHILE (ch = sp) OR (ch = nbsp) OR (ch = ht) DO
             read(ch)
           END (* while *);
           IF (ch # EOL) THEN write_char0(sp) END (* if *)
           (* do not add a trailing sp (40C). *)
           (* note: a trailing sp might be useful for line unfolding. *)
         END (* if *);
       END (* while *);
       (* end-of-file or end-of-line *)
       IF ch = EOL THEN STextIO.WriteLn END (* if *);
 
       read(ch)
     END (* while *);
     (* end-of-file *)
   END Process;
 
 
 BEGIN
   ProgramArgs.NextArg(); 
   (* first argument is tool name so skip. *)
 
   IF ProgramArgs.IsArgPresent() THEN
     (* command-line arguments are named input files: *)
     LOOP
       TextIO.ReadToken(ProgramArgs.ArgChan(), ifn);
       StreamFile.Open(inf, ifn, StreamFile.read, result);
       IF result # StreamFile.opened THEN
         WriteErrLn; WriteErrMsg('Could not open input file: ');
         WriteErrMsg(ifn);
         EXIT
       END (* if *);
 
       done := done1;
       read := read_char1;
       Process;
 
       StreamFile.Close(inf);
       ProgramArgs.NextArg();
       IF NOT(ProgramArgs.IsArgPresent()) THEN EXIT END (* if *)
     END (* loop *)
   ELSE
     (* stdin/stdout with command-line redirection: *)
 
     done := done0;
     read := read_char0;
     Process
   END (* if *)
 
 END Crunch0.