aboutsummaryrefslogtreecommitdiff
path: root/clang/sec6/opg3.c
diff options
context:
space:
mode:
Diffstat (limited to 'clang/sec6/opg3.c')
-rw-r--r--clang/sec6/opg3.c171
1 files changed, 171 insertions, 0 deletions
diff --git a/clang/sec6/opg3.c b/clang/sec6/opg3.c
new file mode 100644
index 0000000..db2b1fd
--- /dev/null
+++ b/clang/sec6/opg3.c
@@ -0,0 +1,171 @@
+/* Exercise 6-3
+ * Write a cross-referencer that prints a list of all words in a document, and,
+ * for each word, a list of the line numbers on which it occurs.
+ * Remove noise words like "the," "and," and so on.
+ */
+
+#include <stdio.h>
+#include <ctype.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define MAXWORDSIZE 100
+#define BLACKWORDS (sizeof(blacklist) / sizeof(blacklist[0]))
+
+// Sorted word blacklist
+char *blacklist[] = {"a", "and", "of", "the"};
+
+bool checkchar(char c) {
+ //return isalpha(c) || c == '\'';
+ return isalpha(c);
+}
+
+int getword(char *dest, size_t max) {
+ char c;
+ char *w = dest;
+
+ while (!checkchar(c = getchar()) && c != EOF) {
+ if (c == '\n') {
+ goto exit;
+ }
+ }
+ if (c == EOF) {
+ goto exit;
+ }
+
+ *w++ = tolower(c);
+ for ( ; --max; w++) {
+ if (!checkchar(c = getchar())) {
+ break;
+ }
+ *w = tolower(c);
+ }
+
+exit:
+ *w = '\0';
+ return c;
+}
+
+bool checkword(char *w) {
+ int l = 0;
+ int r = BLACKWORDS-1;
+
+ while (l <= r) {
+ int m = (l+r)/2;
+ int compare = strcmp(w, blacklist[m]);
+ if (compare == 0) {
+ return true;
+ }
+ if (compare < 0) {
+ r = m-1;
+ } else {
+ l = m+1;
+ }
+ }
+
+ return false;
+}
+
+// Tree stuff
+typedef struct occur_list {
+ unsigned line;
+ struct occur_list *next;
+} occur_t;
+
+typedef struct tree_node {
+ char word[MAXWORDSIZE];
+ occur_t *occurs;
+
+ struct tree_node *left;
+ struct tree_node *right;
+} treenode_t;
+
+occur_t *occur_insert(occur_t *l, unsigned line) {
+ occur_t *new = (occur_t *) malloc(sizeof(occur_t));
+
+ new->next = l ? l->next : new;
+ new->line = line;
+ if (l) {
+ l->next = new;
+ }
+
+ return new;
+}
+
+void occur_print(occur_t *l) {
+ if (!l) {
+ return;
+ }
+ occur_t *n = l->next;
+ for ( ; n; n = n->next) {
+ printf("%d ", n->line);
+ if (n == l) {
+ break;
+ }
+ }
+}
+
+treenode_t *tree_insert(treenode_t *t, char *word, unsigned line) {
+ if (!t) {
+ treenode_t *node = (treenode_t *) malloc(sizeof(treenode_t));
+
+ strcpy(node->word, word);
+ node->left = node->right = NULL;
+ node->occurs = occur_insert(node->occurs, line);
+
+ return node;
+ }
+
+ int compare = strcmp(t->word, word);
+ if (compare == 0) {
+ t->occurs = occur_insert(t->occurs, line);
+ return t;
+ }
+
+ treenode_t **branch = compare > 0 ? &t->left : &t->right;
+ *branch = tree_insert(*branch, word, line);
+
+ return t;
+}
+
+void tree_print_words(treenode_t *t, unsigned longest) {
+ if (!t) {
+ return;
+ }
+ tree_print_words(t->left, longest);
+
+ printf("%*.*s: ", -longest, longest, t->word);
+ occur_print(t->occurs);
+ putchar('\n');
+
+ tree_print_words(t->right, longest);
+}
+
+int main() {
+ char word[MAXWORDSIZE];
+ char c;
+ treenode_t *root = NULL;
+ unsigned line = 1;
+ size_t longest = 0;
+
+ unsigned i = 0;
+ while ((c = getword(word, MAXWORDSIZE)) != EOF) {
+ if (word[0] == '\0' || checkword(word)) {
+ goto next_word;
+ }
+
+ size_t wlen = strlen(word);
+ if (wlen > longest) {
+ longest = wlen;
+ }
+
+ root = tree_insert(root, word, line);
+
+ // This goto can be replaced by a for loop
+next_word:
+ if (c == '\n') { line++; }
+ }
+
+ tree_print_words(root, longest);
+}