You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

124 lines
4.7 KiB
Scheme

;;; freq.ss
;;; Copyright (C) 1996 R. Kent Dybvig
;;; from "The Scheme Programming Language, 2ed" by R. Kent Dybvig
;;; Permission is hereby granted, free of charge, to any person obtaining a
;;; copy of this software and associated documentation files (the "Software"),
;;; to deal in the Software without restriction, including without limitation
;;; the rights to use, copy, modify, merge, publish, distribute, sublicense,
;;; and/or sell copies of the Software, and to permit persons to whom the
;;; Software is furnished to do so, subject to the following conditions:
;;;
;;; The above copyright notice and this permission notice shall be included in
;;; all copies or substantial portions of the Software.
;;;
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
;;; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
;;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
;;; DEALINGS IN THE SOFTWARE.
;;; If the next character on p is a letter, get-word reads a word
;;; from p and returns it in a string. If the character is not a
;;; letter, get-word returns the character (on eof, the eof-object).
(define get-word
(lambda (p)
(let ((c (read-char p)))
(if (eq? (char-type c) 'letter)
(list->string
(let loop ((c c))
(cons c
(if (memq (char-type (peek-char p)) '(letter digit))
(loop (read-char p))
'()))))
c))))
;;; char-type tests for the eof-object first, since the eof-object
;;; may not be a valid argument to char-alphabetic? or char-numeric?
;;; It returns the eof-object, the symbol letter, the symbol digit,
;;; or the argument itself if it is not a letter or digit.
(define char-type
(lambda (c)
(cond
((eof-object? c) c)
((char-alphabetic? c) 'letter)
((char-numeric? c) 'digit)
(else c))))
;;; Trees are represented as vectors with four fields: word, left,
;;; right, and count. Only one field, word, is initialized by an
;;; argument to the constructor procedure make-tree. The remaining
;;; fields are explicitly initialized and changed by subsequent
;;; operations. Most Scheme systems provide structure definition
;;; facilities that automate creation of structure manipulation
;;; procedures, but we simply define the procedures by hand here.
(define make-tree
(lambda (word)
(vector word '() '() 1)))
(define tree-word (lambda (tree) (vector-ref tree 0)))
(define tree-left (lambda (tree) (vector-ref tree 1)))
(define set-tree-left!
(lambda (tree new-left)
(vector-set! tree 1 new-left)))
(define tree-right (lambda (tree) (vector-ref tree 2)))
(define set-tree-right!
(lambda (tree new-right)
(vector-set! tree 2 new-right)))
(define tree-count (lambda (tree) (vector-ref tree 3)))
(define set-tree-count!
(lambda (tree new-count)
(vector-set! tree 3 new-count)))
;;; If the word already exists in the tree, tree increments its
;;; count. Otherwise, a new tree node is created and put into the
;;; tree. In any case, the new or modified tree is returned.
(define tree
(lambda (node word)
(cond
((null? node) (make-tree word))
((string=? word (tree-word node))
(set-tree-count! node (+ (tree-count node) 1))
node)
((string<? word (tree-word node))
(set-tree-left! node (tree (tree-left node) word))
node)
(else
(set-tree-right! node (tree (tree-right node) word))
node))))
;;; tree-print prints the tree in "in-order," i.e., left subtree,
;;; then node, then right subtree. For each word, the count and the
;;; word are printed on a single line.
(define tree-print
(lambda (node p)
(if (not (null? node))
(begin
(tree-print (tree-left node) p)
(write (tree-count node) p)
(write-char #\space p)
(display (tree-word node) p)
(newline p)
(tree-print (tree-right node) p)))))
;;; frequency is the driver routine. It opens the files, reads the
;;; words, and enters them into the tree. When the input port
;;; reaches end-of-file, it prints the tree and closes the ports.
(define frequency
(lambda (infn outfn)
(let ((ip (open-input-file infn))
(op (open-output-file outfn)))
(let loop ((root '()))
(let ((w (get-word ip)))
(cond
((eof-object? w) (tree-print root op))
((string? w) (loop (tree root w)))
(else (loop root)))))
(close-input-port ip)
(close-output-port op))))