Merge branch 'add-stmr' into 'master'

[stmr] add case study See merge request !43

Merge branch 'add-stmr' into 'master'
[stmr] add case study See merge request !43
edc3bf87 · Andre Maroneze · 0e40a316 · 9c254423 · edc3bf87 · edc3bf87
Commit edc3bf87 authored 1 year ago by Andre Maroneze
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -174,6 +174,11 @@ solitaire:
    TARGET: solitaire
  <<: *make_job

+stmr:
+  variables:
+    TARGET: stmr
+  <<: *make_job
+
 tsvc:
  variables:
    TARGET: tsvc

--- a/Makefile
+++ b/Makefile
@@ -84,6 +84,7 @@ TARGETS=\
  safestringlib \
  semver \
  solitaire \
+  stmr \
  tsvc \
  tweetnacl-usable \
  verisec \

--- a/README.md
+++ b/README.md
@@ -166,6 +166,7 @@ when available. We also summarize the license of each directory below.
 - `safestringlib`: MIT
 - `semver`: MIT
 - `solitaire`: public domain, see `solitaire.c`
+- `stmr`: MIT
 - `tsvc`: MIT, see `license.txt`
 - `tweetnacl-usable`: public domain, see `LICENSE.txt`
 - `verisec`: several, according to each app

--- a/stmr/.frama-c/GNUmakefile
+++ b/stmr/.frama-c/GNUmakefile
+# Makefile template for Frama-C/Eva case studies.
+# For details and usage information, see the Frama-C User Manual.
+
+### Prologue. Do not modify this block. #######################################
+-include path.mk
+FRAMAC ?= frama-c
+include $(shell $(FRAMAC)-config -print-lib-path)/analysis-scripts/prologue.mk
+###############################################################################
+
+# Edit below as needed. Suggested flags are optional.
+
+MACHDEP = x86_64
+FRAMAC_SHARE = $(shell $(FRAMAC)-config -print-share-path)
+
+## Preprocessing flags (for -cpp-extra-args)
+CPPFLAGS    += \
+
+## General flags
+FCFLAGS     += \
+  -add-symbolic-path=..:. \
+  -kernel-warn-key annot:missing-spec=abort \
+  -kernel-warn-key typing:implicit-function-declaration=abort \
+
+## Eva-specific flags
+EVAFLAGS    += \
+  -eva-warn-key builtins:missing-spec=abort \
+  -eva-precision 2 \
+
+## GUI-only flags
+FCGUIFLAGS += \
+
+## Analysis targets (suffixed with .eva)
+TARGETS = stmr.eva
+
+### Each target <t>.eva needs a rule <t>.parse with source files as prerequisites
+stmr.parse: \
+  ../stmr.c \
+  ../test.c \
+  $(FRAMAC_SHARE)/libc/stdio.c \
+  $(FRAMAC_SHARE)/libc/string.c \
+
+### Epilogue. Do not modify this block. #######################################
+include $(shell $(FRAMAC)-config -print-lib-path)/analysis-scripts/epilogue.mk
+###############################################################################
+
+# optional, for OSCS
+-include ../../Makefile.common
--- a/stmr/.frama-c/stmr.eva/alarms.csv
+++ b/stmr/.frama-c/stmr.eva/alarms.csv
+directory	file	line	function	property kind	status	property
+.	stmr.c	66	isConsonant	initialization	Unknown	\initialized(b + index_0)
+.	stmr.c	66	isConsonant	mem_access	Unknown	\valid_read(b + index_0)
+.	stmr.c	108	getMeasure	signed_overflow	Unknown	index_0 + 1 ≤ 2147483647
+.	stmr.c	111	getMeasure	signed_overflow	Unknown	index_0 + 1 ≤ 2147483647
+.	stmr.c	123	getMeasure	signed_overflow	Unknown	index_0 + 1 ≤ 2147483647
+.	stmr.c	126	getMeasure	signed_overflow	Unknown	index_0 + 1 ≤ 2147483647
+.	stmr.c	127	getMeasure	signed_overflow	Unknown	position + 1 ≤ 2147483647
+.	stmr.c	138	getMeasure	signed_overflow	Unknown	index_0 + 1 ≤ 2147483647
+.	stmr.c	141	getMeasure	signed_overflow	Unknown	index_0 + 1 ≤ 2147483647
+.	stmr.c	152	vowelInStem	signed_overflow	Unknown	index_0 + 1 ≤ 2147483647
+.	stmr.c	164	isDoubleConsonant	initialization	Unknown	\initialized(b + (int)(index_0 - 1))
+.	stmr.c	164	isDoubleConsonant	initialization	Unknown	\initialized(b + index_0)
+.	stmr.c	164	isDoubleConsonant	mem_access	Unknown	\valid_read(b + (int)(index_0 - 1))
+.	stmr.c	164	isDoubleConsonant	mem_access	Unknown	\valid_read(b + index_0)
+.	stmr.c	189	cvc	initialization	Unknown	\initialized(b + index_0)
+.	stmr.c	189	cvc	mem_access	Unknown	\valid_read(b + index_0)
+.	stmr.c	204	ends	initialization	Unknown	\initialized(b + k)
+.	stmr.c	204	ends	mem_access	Unknown	\valid_read(b + k)
+.	stmr.c	208	ends	signed_overflow	Unknown	(int)(k - k0) + 1 ≤ 2147483647
+.	stmr.c	227	setTo	precondition of memmove	Unknown	valid_dest: valid_or_empty(dest, n)
+.	stmr.c	229	setTo	signed_overflow	Unknown	j + length ≤ 2147483647
+.	stmr.c	266	step1ab	initialization	Unknown	\initialized(b + k)
+.	stmr.c	266	step1ab	mem_access	Unknown	\valid_read(b + k)
+.	stmr.c	271	step1ab	initialization	Unknown	\initialized(b + (int)(k - 1))
+.	stmr.c	271	step1ab	mem_access	Unknown	\valid_read(b + (int)(k - 1))
+.	stmr.c	292	step1ab	initialization	Unknown	\initialized(b + k)
+.	stmr.c	292	step1ab	mem_access	Unknown	\valid_read(b + k)
+.	stmr.c	308	step1c	mem_access	Unknown	\valid(b + k)
+.	stmr.c	318	step2	initialization	Unknown	\initialized(b + (int)(k - 1))
+.	stmr.c	318	step2	mem_access	Unknown	\valid_read(b + (int)(k - 1))
+.	stmr.c	457	step3	initialization	Unknown	\initialized(b + k)
+.	stmr.c	457	step3	mem_access	Unknown	\valid_read(b + k)
+.	stmr.c	508	step4	initialization	Unknown	\initialized(b + (int)(k - 1))
+.	stmr.c	508	step4	mem_access	Unknown	\valid_read(b + (int)(k - 1))
+.	stmr.c	566	step4	initialization	Unknown	\initialized(b + j)
+.	stmr.c	566	step4	mem_access	Unknown	\valid_read(b + j)
+.	stmr.c	628	step5	initialization	Unknown	\initialized(b + k)
+.	stmr.c	628	step5	mem_access	Unknown	\valid_read(b + k)
+.	stmr.c	636	step5	initialization	Unknown	\initialized(b + k)
+.	stmr.c	636	step5	mem_access	Unknown	\valid_read(b + k)
+.	test.c	21	assertStem	signed_overflow	Unknown	tmp_0 + 1 ≤ 2147483647
+.	test.c	21	assertStem	precondition of strlen	Unknown	valid_string_s: valid_read_string(s)
+.	test.c	21	assertStem	mem_access	Unknown	\valid(result + (int)(tmp_0 + 1))
+.	test.c	24	assertStem	signed_overflow	Unknown	errorCount + 1 ≤ 2147483647
+.	test.c	27	assertStem	precondition of fprintf_va_2	Unknown	valid_read_string(param0)
+.	test.c	27	assertStem	precondition of fprintf_va_2	Unknown	valid_read_string(param1)
+.	test.c	27	assertStem	precondition of fprintf_va_2	Unknown	valid_read_string(param2)
+.	test.c	34	assertStem	signed_overflow	Unknown	assertionCount + 1 ≤ 2147483647
+.	test.c	63	main	precondition of strlen	Unknown	valid_string_s: valid_read_string(s)
+.	test.c	63	main	mem_access	Unknown	\valid(lineIn + (size_t)(tmp - 1))
+.	test.c	64	main	precondition of strlen	Unknown	valid_string_s: valid_read_string(s)
+.	test.c	64	main	mem_access	Unknown	\valid(lineOut + (size_t)(tmp_0 - 1))
+FRAMAC_SHARE/libc	stdio.c	90	getline	mem_access	Unknown	\valid(*lineptr + tmp_2)
+FRAMAC_SHARE/libc	stdio.c	93	getline	mem_access	Unknown	\valid(*lineptr + cur)
+FRAMAC_SHARE/libc	stdio.h	207	fprintf_va_2	precondition	Unknown	valid_read_string(param0)
+FRAMAC_SHARE/libc	stdio.h	207	fprintf_va_2	precondition	Unknown	valid_read_string(param1)
+FRAMAC_SHARE/libc	stdio.h	207	fprintf_va_2	precondition	Unknown	valid_read_string(param2)
+FRAMAC_SHARE/libc	string.c	146	strcmp	initialization	Unknown	\initialized(s1 + i)
+FRAMAC_SHARE/libc	string.c	146	strcmp	initialization	Unknown	\initialized(s2 + i)
+FRAMAC_SHARE/libc	string.c	146	strcmp	mem_access	Unknown	\valid_read(s1 + i)
+FRAMAC_SHARE/libc	string.c	146	strcmp	mem_access	Unknown	\valid_read(s2 + i)
+FRAMAC_SHARE/libc	string.c	149	strcmp	initialization	Unknown	\initialized((unsigned char *)s1 + i)
+FRAMAC_SHARE/libc	string.c	149	strcmp	initialization	Unknown	\initialized((unsigned char *)s2 + i)
+FRAMAC_SHARE/libc	string.c	149	strcmp	mem_access	Unknown	\valid_read((unsigned char *)s1 + i)
+FRAMAC_SHARE/libc	string.c	149	strcmp	mem_access	Unknown	\valid_read((unsigned char *)s2 + i)
+FRAMAC_SHARE/libc	string.c	169	memcmp	initialization	Unknown	\initialized(p1 + i)
+FRAMAC_SHARE/libc	string.c	169	memcmp	mem_access	Unknown	\valid_read(p1 + i)
+FRAMAC_SHARE/libc	string.c	318	strdup	precondition of strlen	Unknown	valid_string_s: valid_read_string(s)
+FRAMAC_SHARE/libc	string.c	324	strdup	precondition of memcpy	Unknown	valid_dest: valid_or_empty(dest, n)
+FRAMAC_SHARE/libc	string.c	324	strdup	precondition of memcpy	Unknown	valid_src: valid_read_or_empty(src, n)
+FRAMAC_SHARE/libc	string.h	95	memcpy	precondition	Unknown	valid_dest: valid_or_empty(dest, n)
+FRAMAC_SHARE/libc	string.h	96	memcpy	precondition	Unknown	valid_src: valid_read_or_empty(src, n)
+FRAMAC_SHARE/libc	string.h	120	memmove	precondition	Unknown	valid_dest: valid_or_empty(dest, n)
+FRAMAC_SHARE/libc	string.h	141	strlen	precondition	Unknown	valid_string_s: valid_read_string(s)
--- a/stmr/.frama-c/stmr.eva/metrics.log
+++ b/stmr/.frama-c/stmr.eva/metrics.log
+[metrics] Eva coverage statistics
+=======================
+Syntactically reachable functions = 17 (out of 17)
+Semantically reached functions = 17
+Coverage estimation = 100.0%
+[metrics] References to non-analyzed functions
+------------------------------------
+[metrics] Statements analyzed by Eva
+--------------------------
+500 stmts in analyzed functions, 497 stmts analyzed (99.4%)
+assertStem: 23 stmts out of 23 (100.0%)
+cvc: 26 stmts out of 26 (100.0%)
+ends: 17 stmts out of 17 (100.0%)
+getMeasure: 39 stmts out of 39 (100.0%)
+isDoubleConsonant: 7 stmts out of 7 (100.0%)
+main: 36 stmts out of 36 (100.0%)
+replace: 4 stmts out of 4 (100.0%)
+setTo: 4 stmts out of 4 (100.0%)
+stem: 16 stmts out of 16 (100.0%)
+step1ab: 48 stmts out of 48 (100.0%)
+step1c: 6 stmts out of 6 (100.0%)
+step2: 93 stmts out of 93 (100.0%)
+step3: 34 stmts out of 34 (100.0%)
+step5: 17 stmts out of 17 (100.0%)
+vowelInStem: 15 stmts out of 15 (100.0%)
+step4: 93 stmts out of 94 (98.9%)
+isConsonant: 19 stmts out of 21 (90.5%)
--- a/stmr/.frama-c/stmr.eva/nonterm.log
+++ b/stmr/.frama-c/stmr.eva/nonterm.log
--- a/stmr/.frama-c/stmr.eva/warnings.log
+++ b/stmr/.frama-c/stmr.eva/warnings.log
+stmr.c:74:[eva] warning: Using specification of function isConsonant for recursive calls.
+Analysis of function isConsonant is thus incomplete and its soundness
+relies on the written specification.
--- a/stmr/.frama-c/stmr.parse/framac.ast
+++ b/stmr/.frama-c/stmr.parse/framac.ast
--- a/stmr/.frama-c/stmr.parse/metrics.log
+++ b/stmr/.frama-c/stmr.parse/metrics.log
+[metrics] Defined functions (17)
+======================
+ assertStem (3 calls); cvc (2 calls); ends (56 calls); getMeasure (6 calls);
+ isConsonant (9 calls); isDoubleConsonant (2 calls); main (0 call);
+ replace (28 calls); setTo (6 calls); stem (1 call); step1ab (1 call);
+ step1c (1 call); step2 (1 call); step3 (1 call); step4 (1 call);
+ step5 (1 call); vowelInStem (2 calls); 
+
+Specified-only functions (0)
+============================
+ 
+
+Undefined and unspecified functions (0)
+=======================================
+ 
+
+'Extern' global variables (0)
+=============================
+ 
+
+Potential entry points (1)
+==========================
+ main; 
+
+Global metrics
+============== 
+Sloc = 500
+Decision point = 132
+Global variables = 6
+If = 106
+Loop = 6
+Goto = 37
+Assignment = 154
+Exit point = 17
+Function = 17
+Function call = 152
+Pointer dereferencing = 22
+Cyclomatic complexity = 149
--- a/stmr/.frama-c/stmr.parse/warnings.log
+++ b/stmr/.frama-c/stmr.parse/warnings.log
--- a/stmr/Makefile
+++ b/stmr/Makefile
+TEST = test.c stmr.c
+OBJ_TEST = $(TEST:.c=.o)
+
+CFLAGS = -D_GNU_SOURCE -std=c99
+
+LFLAGS = -Wall -Wno-format-y2k -W -Wstrict-prototypes -Wmissing-prototypes \
+	-Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch \
+	-Wshadow -Wcast-align -Wbad-function-cast -Wchar-subscripts -Winline \
+	-Wnested-externs -Wredundant-decls
+
+COVFLAGS = -Wall -fprofile-arcs -ftest-coverage
+
+test: $(OBJ_TEST)
+	$(CC) $(OBJ_TEST) -o $@
+
+coverage: $(OBJ_TEST)
+	gcc $(COVFLAGS) $(TEST) -o $@
+
+.SUFFIXES: .c .o
+.c.o:
+	$(CC) $< $(CFLAGS) $(LFLAGS) -c -o $@
+
+run-coverage: coverage
+	./coverage && gcov stmr
+
+run-test: test
+	./test
+
+clean:
+	rm -f coverage test $(OBJ_TEST) *.gc{ov,da,no}
+
+.PHONY: clean run-coverage run-test
--- a/stmr/OSCS-README.txt
+++ b/stmr/OSCS-README.txt
+Martin Porter’s Stemming algorithm as a C library
+https://github.com/wooorm/stmr.c
+HAS_DYN_ALLOC, HAS_RECURSION, NO_FLOAT
--- a/stmr/fixture/input.txt
+++ b/stmr/fixture/input.txt
--- a/stmr/fixture/output.txt
+++ b/stmr/fixture/output.txt
--- a/stmr/funding.yml
+++ b/stmr/funding.yml
+github: wooorm
--- a/stmr/license
+++ b/stmr/license
+(The MIT License)
+
+Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/stmr/package.json
+++ b/stmr/package.json
+{
+  "name": "stmr.c",
+  "version": "1.0.0",
+  "description": "The Porter Stemmer algorithm",
+  "license": "MIT",
+  "keywords": [
+    "martin",
+    "porter",
+    "stemmer",
+    "algorithm"
+  ],
+  "repo": "wooorm/stmr.c",
+  "src": [
+    "stmr.h",
+    "stmr.c"
+  ]
+}
--- a/stmr/readme.md
+++ b/stmr/readme.md
+# stmr(3) [![Build Status][travis-badge]][travis] [![Coverage Status][coveralls-badge]][coveralls]
+
+Martin Porter’s [Stemming algorithm][algo] as a C library.
+There’s also a CLI: [stmr(1)][cli].
+
+## Installation
+
+[clib][]:
+
+```bash
+clib install wooorm/stmr.c
+```
+
+Or clone the repo.
+
+## Usage
+
+### `int stem(char *pointer, int start, int end)`
+
+```c
+#include <stdio.h>
+#include <string.h>
+#include "stmr.h"
+
+int
+main(int argc, char **argv) {
+  char *word = argv[1];
+
+  int end = stem(word, 0, strlen(word) - 1);
+
+  word[end + 1] = 0;
+
+  printf("%s", word);
+}
+```
+
+## Related
+
+*   [`stemmer`][lib] — Same algorithm in JavaScript
+*   [`stmr`][cli]
+    — CLI in C
+
+## License
+
+[MIT][license] © [Titus Wormer][author]
+
+<!-- Definitions -->
+
+[travis-badge]: https://img.shields.io/travis/wooorm/stmr.c.svg
+
+[travis]: https://travis-ci.org/wooorm/stmr.c
+
+[coveralls-badge]: https://img.shields.io/coveralls/wooorm/stmr.c.svg
+
+[coveralls]: https://coveralls.io/github/wooorm/stmr.c
+
+[license]: license
+
+[author]: http://wooorm.com
+
+[algo]: http://tartarus.org/martin/PorterStemmer/
+
+[cli]: https://github.com/wooorm/stmr
+
+[lib]: https://github.com/words/stemmer
+
+[clib]: https://github.com/clibs/clib
--- a/stmr/stmr.c
+++ b/stmr/stmr.c
+/* This is the Porter stemming algorithm, coded up in ANSI C by the
+ * author. It may be be regarded as canonical, in that it follows the
+ * algorithm presented in
+ *
+ * Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+ * no. 3, pp 130-137,
+ *
+ * only differing from it at the points marked --DEPARTURE-- below.
+ *
+ * See also http://www.tartarus.org/~martin/PorterStemmer
+ *
+ * The algorithm as described in the paper could be exactly replicated
+ * by adjusting the points of DEPARTURE, but this is barely necessary,
+ * because (a) the points of DEPARTURE are definitely improvements, and
+ * (b) no encoding of the Porter stemmer I have seen is anything like
+ * as exact as this version, even with the points of DEPARTURE!
+ *
+ * You can compile it on Unix with 'gcc -O3 -o stem stem.c' after which
+ * 'stem' takes a list of inputs and sends the stemmed equivalent to
+ * stdout.
+ *
+ * The algorithm as encoded here is particularly fast.
+ *
+ * Release 1: was many years ago
+ * Release 2: 11 Apr 2013
+ *     fixes a bug noted by Matt Patenaude <matt@mattpatenaude.com>,
+ *
+ *     case 'o': if (ends("\03" "ion") && (b[j] == 's' || b[j] == 't')) break;
+ *         ==>
+ *     case 'o': if (ends("\03" "ion") && j >= k0 && (b[j] == 's' || b[j] == 't')) break;
+ *
+ *     to avoid accessing b[k0-1] when the word in b is "ion".
+ * Release 3: 25 Mar 2014
+ *     fixes a similar bug noted by Klemens Baum <klemensbaum@gmail.com>,
+ *     that if step1ab leaves a one letter result (ied -> i, aing -> a etc),
+ *     step2 and step4 access the byte before the first letter. So we skip
+ *     steps after step1ab unless k > k0. */
+#include <string.h>
+#include "stmr.h"
+
+/* The main part of the stemming algorithm starts here. b is a buffer
+ * holding a word to be stemmed. The letters are in b[k0], b[k0+1] ...
+ * ending at b[k]. In fact k0 = 0 in this demo program. k is readjusted
+ * downwards as the stemming progresses. Zero termination is not in fact
+ * used in the algorithm.
+ *
+ * Note that only lower case sequences are stemmed. Forcing to lower case
+ * should be done before stem(...) is called. */
+
+/* buffer for word to be stemmed */
+static char *b;
+
+static int k;
+static int k0;
+
+/* j is a general offset into the string */
+static int j;
+
+/**
+ * TRUE when `b[i]` is a consonant.
+ */
+
+/*@ assigns \result \from indirect:b[index], indirect:index; */
+static int
+isConsonant(int index) {
+  switch (b[index]) {
+    case 'a':
+    case 'e':
+    case 'i':
+    case 'o':
+    case 'u':
+      return FALSE;
+    case 'y':
+      return (index == k0) ? TRUE : !isConsonant(index - 1);
+    default:
+      return TRUE;
+  }
+}
+
+/* Measure the number of consonant sequences between
+ * `k0` and `j`.  If C is a consonant sequence and V
+ * a vowel sequence, and <..> indicates arbitrary
+ * presence:
+ *
+ *   <C><V>       gives 0
+ *   <C>VC<V>     gives 1
+ *   <C>VCVC<V>   gives 2
+ *   <C>VCVCVC<V> gives 3
+ *   ....
+ */
+static int
+getMeasure() {
+  int position;
+  int index;
+
+  position = 0;
+  index = k0;
+
+  while (TRUE) {
+    if (index > j) {
+      return position;
+    }
+
+    if (!isConsonant(index)) {
+      break;
+    }
+
+    index++;
+  }
+
+  index++;
+
+  while (TRUE) {
+    while (TRUE) {
+      if (index > j) {
+        return position;
+      }
+
+      if (isConsonant(index)) {
+        break;
+      }
+
+      index++;
+    }
+
+    index++;
+    position++;
+
+    while (TRUE) {
+      if (index > j) {
+        return position;
+      }
+
+      if (!isConsonant(index)) {
+        break;
+      }
+
+      index++;
+    }
+
+    index++;
+  }
+}
+
+/* `TRUE` when `k0, ... j` contains a vowel. */
+static int
+vowelInStem() {
+  int index;
+
+  index = k0 - 1;
+
+  while (++index <= j) {
+    if (!isConsonant(index)) {
+      return TRUE;
+    }
+  }
+
+  return FALSE;
+}
+
+/* `TRUE` when `j` and `(j-1)` are the same consonant. */
+static int
+isDoubleConsonant(int index) {
+  if (b[index] != b[index - 1]) {
+    return FALSE;
+  }
+
+  return isConsonant(index);
+}
+
+/* `TRUE` when `i - 2, i - 1, i` has the form
+ * `consonant - vowel - consonant` and also if the second
+ * C is not `"w"`, `"x"`, or `"y"`. this is used when
+ * trying to restore an `e` at the end of a short word.
+ *
+ * Such as:
+ *
+ * `cav(e)`, `lov(e)`, `hop(e)`, `crim(e)`, but `snow`,
+ * `box`, `tray`.
+ */
+static int
+cvc(int index) {
+  int character;
+
+  if (index < k0 + 2 || !isConsonant(index) || isConsonant(index - 1) || !isConsonant(index - 2)) {
+    return FALSE;
+  }
+
+  character = b[index];
+
+  if (character == 'w' || character == 'x' || character == 'y') {
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
+/* `ends(s)` is `TRUE` when `k0, ...k` ends with `value`. */
+static int
+ends(const char *value) {
+  int length = value[0];
+
+  /* Tiny speed-up. */
+  if (value[length] != b[k]) {
+    return FALSE;
+  }
+
+  if (length > k - k0 + 1) {
+    return FALSE;
+  }
+
+  if (memcmp(b + k - length + 1, value + 1, length) != 0) {
+    return FALSE;
+  }
+
+  j = k - length;
+
+  return TRUE;
+}
+
+/* `setTo(value)` sets `(j + 1), ...k` to the characters in
+ * `value`, readjusting `k`. */
+static void
+setTo(const char *value) {
+  int length = value[0];
+
+  memmove(b + j + 1, value + 1, length);
+
+  k = j + length;
+}
+
+/* Set string. */
+static void
+replace(const char *value) {
+  if (getMeasure() > 0) {
+    setTo(value);
+  }
+}
+
+/* `step1ab()` gets rid of plurals, `-ed`, `-ing`.
+ *
+ * Such as:
+ *
+ *   caresses  ->  caress
+ *   ponies    ->  poni
+ *   ties      ->  ti
+ *   caress    ->  caress
+ *   cats      ->  cat
+ *
+ *   feed      ->  feed
+ *   agreed    ->  agree
+ *   disabled  ->  disable
+ *
+ *   matting   ->  mat
+ *   mating    ->  mate
+ *   meeting   ->  meet
+ *   milling   ->  mill
+ *   messing   ->  mess
+ *
+ *   meetings  ->  meet
+ */
+static void
+step1ab() {
+  int character;
+
+  if (b[k] == 's') {
+    if (ends("\04" "sses")) {
+      k -= 2;
+    } else if (ends("\03" "ies")) {
+      setTo("\01" "i");
+    } else if (b[k - 1] != 's') {
+      k--;
+    }
+  }
+
+  if (ends("\03" "eed")) {
+    if (getMeasure() > 0) {
+      k--;
+    }
+  } else if ((ends("\02" "ed") || ends("\03" "ing")) && vowelInStem()) {
+    k = j;
+
+    if (ends("\02" "at")) {
+      setTo("\03" "ate");
+    } else if (ends("\02" "bl")) {
+      setTo("\03" "ble");
+    } else if (ends("\02" "iz")) {
+      setTo("\03" "ize");
+    } else if (isDoubleConsonant(k)) {
+      k--;
+
+      character = b[k];
+
+      if (character == 'l' || character == 's' || character == 'z') {
+        k++;
+      }
+    } else if (getMeasure() == 1 && cvc(k)) {
+      setTo("\01" "e");
+    }
+  }
+}
+
+/* `step1c()` turns terminal `"y"` to `"i"` when there
+ * is another vowel in the stem. */
+static void
+step1c() {
+  if (ends("\01" "y") && vowelInStem()) {
+    b[k] = 'i';
+  }
+}
+
+/* `step2()` maps double suffices to single ones.
+ * so -ization ( = -ize plus -ation) maps to -ize etc.
+ * note that the string before the suffix must give
+ * getMeasure() > 0. */
+static void
+step2() {
+  switch (b[k - 1]) {
+    case 'a':
+      if (ends("\07" "ational")) {
+        replace("\03" "ate");
+        break;
+      }
+
+      if (ends("\06" "tional")) {
+        replace("\04" "tion");
+        break;
+      }
+
+      break;
+    case 'c':
+      if (ends("\04" "enci")) {
+        replace("\04" "ence");
+        break;
+      }
+
+      if (ends("\04" "anci")) {
+        replace("\04" "ance");
+        break;
+      }
+
+      break;
+    case 'e':
+      if (ends("\04" "izer")) {
+        replace("\03" "ize");
+        break;
+      }
+
+      break;
+    case 'l':
+      /* --DEPARTURE--: To match the published algorithm,
+       * replace this line with:
+       *
+       * ```
+       * if (ends("\04" "abli")) {
+       *     replace("\04" "able");
+       *
+       *     break;
+       * }
+       * ```
+       */
+      if (ends("\03" "bli")) {
+        replace("\03" "ble");
+        break;
+      }
+
+      if (ends("\04" "alli")) {
+        replace("\02" "al");
+        break;
+      }
+
+      if (ends("\05" "entli")) {
+        replace("\03" "ent");
+        break;
+      }
+
+      if (ends("\03" "eli")) {
+        replace("\01" "e");
+        break;
+      }
+
+      if (ends("\05" "ousli")) {
+        replace("\03" "ous");
+        break;
+      }
+
+      break;
+    case 'o':
+      if (ends("\07" "ization")) {
+        replace("\03" "ize");
+        break;
+      }
+
+      if (ends("\05" "ation")) {
+        replace("\03" "ate");
+        break;
+      }
+
+      if (ends("\04" "ator")) {
+        replace("\03" "ate");
+        break;
+      }
+
+      break;
+    case 's':
+      if (ends("\05" "alism")) {
+        replace("\02" "al");
+        break;
+      }
+
+      if (ends("\07" "iveness")) {
+        replace("\03" "ive");
+        break;
+      }
+
+      if (ends("\07" "fulness")) {
+        replace("\03" "ful");
+        break;
+      }
+
+      if (ends("\07" "ousness")) {
+        replace("\03" "ous");
+        break;
+      }
+
+      break;
+    case 't':
+      if (ends("\05" "aliti")) {
+        replace("\02" "al");
+        break;
+      }
+
+      if (ends("\05" "iviti")) {
+        replace("\03" "ive");
+        break;
+      }
+
+      if (ends("\06" "biliti")) {
+        replace("\03" "ble");
+        break;
+      }
+
+      break;
+    /* --DEPARTURE--: To match the published algorithm, delete this line. */
+    case 'g':
+      if (ends("\04" "logi")) {
+        replace("\03" "log");
+        break;
+      }
+  }
+}
+
+/* `step3()` deals with -ic-, -full, -ness etc.
+ * similar strategy to step2. */
+static void
+step3() {
+  switch (b[k]) {
+    case 'e':
+      if (ends("\05" "icate")) {
+        replace("\02" "ic");
+        break;
+      }
+
+      if (ends("\05" "ative")) {
+        replace("\00" "");
+        break;
+      }
+
+      if (ends("\05" "alize")) {
+        replace("\02" "al");
+        break;
+      }
+
+      break;
+    case 'i':
+      if (ends("\05" "iciti")) {
+        replace("\02" "ic");
+        break;
+      }
+
+      break;
+    case 'l':
+      if (ends("\04" "ical")) {
+        replace("\02" "ic");
+        break;
+      }
+
+      if (ends("\03" "ful")) {
+        replace("\00" "");
+        break;
+      }
+
+      break;
+    case 's':
+      if (ends("\04" "ness")) {
+        replace("\00" "");
+        break;
+      }
+
+      break;
+  }
+}
+
+/* `step4()` takes off -ant, -ence etc., in
+ * context <c>vcvc<v>. */
+static void
+step4() {
+  switch (b[k - 1]) {
+    case 'a':
+      if (ends("\02" "al")) {
+        break;
+      }
+
+      return;
+    case 'c':
+      if (ends("\04" "ance")) {
+        break;
+      }
+
+      if (ends("\04" "ence")) {
+        break;
+      }
+
+      return;
+    case 'e':
+      if (ends("\02" "er")) {
+        break;
+      }
+
+      return;
+    case 'i':
+      if (ends("\02" "ic")) {
+        break;
+      }
+
+      return;
+    case 'l':
+      if (ends("\04" "able")) {
+        break;
+      }
+
+      if (ends("\04" "ible")) {
+        break;
+      }
+
+      return;
+    case 'n':
+      if (ends("\03" "ant")) {
+        break;
+      }
+
+      if (ends("\05" "ement")) {
+        break;
+      }
+
+      if (ends("\04" "ment")) {
+        break;
+      }
+
+      if (ends("\03" "ent")) {
+        break;
+      }
+
+      return;
+    case 'o':
+      if (ends("\03" "ion") && j >= k0 && (b[j] == 's' || b[j] == 't')) {
+        break;
+      }
+
+      /* takes care of -ous */
+      if (ends("\02" "ou")) {
+        break;
+      }
+
+      return;
+    case 's':
+      if (ends("\03" "ism")) {
+        break;
+      }
+
+      return;
+    case 't':
+      if (ends("\03" "ate")) {
+        break;
+      }
+
+      if (ends("\03" "iti")) {
+        break;
+      }
+
+      return;
+    case 'u':
+      if (ends("\03" "ous")) {
+        break;
+      }
+
+      return;
+    case 'v':
+      if (ends("\03" "ive")) {
+        break;
+      }
+
+      return;
+    case 'z':
+      if (ends("\03" "ize")) {
+        break;
+      }
+
+      return;
+    default:
+      return;
+  }
+
+  if (getMeasure() > 1) {
+    k = j;
+  }
+}
+
+/* `step5()` removes a final `-e` if `getMeasure()` is
+ * greater than `1`, and changes `-ll` to `-l` if
+ * `getMeasure()` is greater than `1`. */
+static void
+step5() {
+  int a;
+
+  j = k;
+
+  if (b[k] == 'e') {
+    a = getMeasure();
+
+    if (a > 1 || (a == 1 && !cvc(k - 1))) {
+      k--;
+    }
+  }
+
+  if (b[k] == 'l' && isDoubleConsonant(k) && getMeasure() > 1) {
+    k--;
+  }
+}
+
+/* In `stem(p, i, j)`, `p` is a `char` pointer, and the
+ * string to be stemmed is from `p[i]` to
+ * `p[j]` (inclusive).
+ *
+ * Typically, `i` is zero and `j` is the offset to the
+ * last character of a string, `(p[j + 1] == '\0')`.
+ * The stemmer adjusts the characters `p[i]` ... `p[j]`
+ * and returns the new end-point of the string, `k`.
+ *
+ * Stemming never increases word length, so `i <= k <= j`.
+ *
+ * To turn the stemmer into a module, declare 'stem' as
+ * extern, and delete the remainder of this file. */
+int
+stem(char *p, int index, int position) {
+  /* Copy the parameters into statics. */
+  b = p;
+  k = position;
+  k0 = index;
+
+  if (k <= k0 + 1) {
+    return k; /* --DEPARTURE-- */
+  }
+
+  /* With this line, strings of length 1 or 2 don't
+   * go through the stemming process, although no
+   * mention is made of this in the published
+   * algorithm. Remove the line to match the published
+   * algorithm. */
+  step1ab();
+
+  if (k > k0) {
+    step1c();
+    step2();
+    step3();
+    step4();
+    step5();
+  }
+
+  return k;
+}