#include "BUCASCS320.hats"
staload "libc/SATS/stdlib.sats"
typedef cmp (elt: t@ype) = (elt, elt) -<fun> int
typedef natLt (n: int) = [i:nat | i < n] int (i)
extern fun{elt:t@ype} bsearch_cloref {n:nat}
(f: natLt n -<cloref> elt, n: size_t n, v: elt, cmp: cmp elt)
:<> [i:int | ~1 <= i; i < n] int i
implement{elt} bsearch_cloref {n}
(f, n, v, cmp) = loop (0, n-1) where {
typedef res_t = [i:int | ~1 <= i; i < n] int i
fun loop
{i,j:int | 0 <= i; i <= j+1; j+1 <= n} .<j-i+1>.
(lb: int i, ub: int j):<cloref> res_t =
if lb <= ub then let
val m = lb + (ub - lb) / 2; val sgn = cmp (v, f (m))
in
if sgn >= 0 then loop (m+1, ub) else loop (lb, m-1)
end else ub
val n = int1_of_size1 (n)
}
extern fun wordbeg_get {n:nat}
(wordarr: array (string, n), n: size_t n, word: string): int
implement wordbeg_get (wordarr, n, word) = let
fn cmp (s1: string, s2: string):<> int = let
val sgn = compare (s1, s2) in if sgn > 0 then 1 else ~1
end in
bsearch_cloref (lam (i) => $effmask_ref (wordarr[i]), n, word, cmp)
end
extern fun wordend_get {n:nat}
(wordarr: array (string, n), n: size_t n, word: string): int
implement wordend_get (wordarr, n, word) = let
fn cmp (s1: string, s2: string):<> int = let
val s1 = string1_of_string s1
val s2 = string1_of_string s2
fun loop {n1,n2:nat}
{i:nat | i <= n1; i <= n2} .<n1-i>.
(s1: string n1, s2: string n2, i: size_t i):<> Sgn =
if string_is_at_end (s1, i) then
if string_is_at_end (s2, i) then 1
else let
val c2 = s2[i]
in
if char_isalpha (c2) then ~1 else 1
end
else
if string_is_at_end (s2, i) then 1
else let
val sgn = compare (s1[i], s2[i])
in
if sgn <> 0 then sgn else loop (s1, s2, i+1)
end
in
loop (s1, s2, 0)
end in
bsearch_cloref (lam (i) => $effmask_ref (wordarr[i]), n, word, cmp)
end
extern fun wordcnt_get {n:nat}
(wordarr: array (string, n), n: size_t n, word: string): int
implement wordcnt_get (wordarr, n, word) = let
val n0 = wordbeg_get (wordarr, n, word) and n1 = wordend_get (wordarr, n, word)
in
n1 - n0
end
extern fun theText_initialize (
buf: &string? >> string n
, nbuf: &size_t? >> size_t n
, wordarr: &array? >> array (string, n1)
, nwordarr: &size_t? >> size_t n1
) : #[n:nat;n1:nat] void = "theText_initialize"
fn fprint_first_word
(out: FILEref, s: string): void = loop (0) where {
val [n:int] s = string1_of_string (s)
fun loop {i:nat | i <= n} (i: size_t i):<cloref1> void =
if string_isnot_at_end (s, i) then let
val c = s[i]
in
if char_isalpha (c) then (fprint_char (out, s[i]); loop (i+1))
end }
fn first_word_get (s: string): string = let
val [n:int] s = string1_of_string (s)
fun loop {i:nat | i <= n} (i: size_t i):<cloref1> sizeLte n =
if string_is_at_end (s, i) then i else
(if char_isalpha (s[i]) then loop (i+1) else i)
in
string_make_substring (s, 0, loop 0)
end
local
staload "funbinheap.dats"
typedef elt = @(
int, string
)
val cmp = lam (x1: elt, x2: elt): Sgn =<cloref> compare (x2.0, x1.0)
fn wordcounthp_build {n:nat}
(wordarr: array (string, n), n: size_t n)
: fbinhp (elt) = loop (res, 0, ntot) where {
typedef res_t = fbinhp (elt)
val res = funbinheap_empty<> {elt} (); var ntot: int = 0
fun loop {i:nat} (res: res_t, i: size_t i, ntot: &int):<cloref1> res_t =
if i < n then let
val wrd = first_word_get (wordarr[i])
val cnt = wordcnt_get (wordarr, n, wrd)
val res = funbinheap_insert<elt> (res, @(cnt, wrd), cmp)
val cnt = int1_of_int (cnt)
val () = assert (cnt >= 1)
val i = i + cnt
val () = ntot := ntot + 1
in
loop (res, i, ntot)
end else let
in
res end
}
in
fn fprint_used_words {n:nat} (
out: FILEref, wordarr: array (string, n), n: size_t n, nword: int
) : void = let
val hp = wordcounthp_build (wordarr, n)
val nhp = funbinheap_size (hp)
val () = fprintf (out, "The total number of words used is [%i].\n", @(nhp))
val () = loop (hp, nhp, 0) where {
fun loop {nhp,i:nat}
(res: fbinhp (elt, nhp), nhp: int nhp, i: int i):<cloref1> void =
if i < nword then begin
if nhp > 0 then let
var x: elt val res = funbinheap_delmin<elt> (res, x, cmp)
val () = fprintf (out, "%s (%i)\n", @(x.1, x.0))
in
loop (res, nhp-1, i+1)
end else begin
end end else begin
end }
in
end
end
dynload "funbinheap.dats"
implement main () = () where {
var theTextBuf: string var nbuf: size_t var theTextWordArr: array (string, 0) var nwordarr: size_t val () = theText_initialize (theTextBuf, nbuf, theTextWordArr, nwordarr)
val theTextWordArr = theTextWordArr
val () = begin
print "theTextBuf_sz = "; print nbuf; print_newline ();
print "theTextWordArr_sz = "; print nwordarr; print_newline ();
end
val () = () where {
val (vbox pf_arr | p_arr) = array_get_view_ptr {string} (theTextWordArr)
val () = qsort {string} (!p_arr, nwordarr, sizeof<string>, lam (s1, s2) => compare (s1, s2))
} val () = (print "sorting is finished.\n"; print_newline ())
val () = fprint_used_words (stdout_ref, theTextWordArr, nwordarr, 500)
}
%{$
#define CBUFSZ (16 * 1024 * 1024)
char theTextBuf[CBUFSZ] ;
#define WBUFSZ (1024 * 1024)
char *theTextWordArr[WBUFSZ] ;
ats_void_type
theText_initialize (
ats_ref_type p_buf
, ats_ref_type p_nbuf
, ats_ref_type p_wordarr
, ats_ref_type p_nwordarr
) {
int c ;
int iw ;
size_t i ;
char *s ;
size_t i1 ;
char **s1 ;
iw = 0 ;
s = theTextBuf ;
i = 0 ;
s1 = theTextWordArr ;
i1 = 0 ;
while (1) {
c = fgetc(stdin) ;
if (c == EOF) break ;
*s = tolower(c) ;
//
if (isalpha(c)) {
if (iw == 0) { iw = 1 ; *s1++ = s ; i1++ ; }
} else {
iw = 0 ;
} /* end of [if] */
//
s++ ; i++ ;
} /* end of [while] */
*s = '\000' ;
*((char**)p_buf) = theTextBuf ;
*((size_t*)p_nbuf) = i ;
*((char***)p_wordarr) = theTextWordArr ;
*((size_t*)p_nwordarr) = i1 ;
if (i >= CBUFSZ) {
fprintf (stderr, "exit: too many chars for processing\n") ; exit(1) ;
} /* end of [if] */
if (i1 >= WBUFSZ) {
fprintf (stderr, "exit: too many words for processing\n") ; exit(1) ;
} /* end of [if] */
return ;
} /* end of [theTextBuf_initialize] */
%}