1.打开文件
1 ????FILE *fp; ??????????????????????????//文件指针2 ????fp = fopen("D:\\test.txt", "r"); ???//只读方式打开文件3 ????if (fp == NULL)4 ????{5 ????????printf("文件打开失败");6 ????????exit(0)
2.读取文本
1 ?????char buf[1000] = { 0 }; ????????????//buffer预清空,否则结尾有出乱码的可能2 ?????fread(buf, sizeof(buf), 1, fp); ????3 ?????printf("%s\n", buf); ?
3.结构体定义
1 ????typedef struct BinaryTree {2 ????????char str; ???????????????????????//单词3 ????????int count; ???????????????????????//出现次数4 ????????struct BinaryTree * lchild;5 ????????struct BinaryTree * rchild;6 ????}BTNode;
4.给BTNode分配内存
1 ????BTNode* talloc(void)2 ????{3 ????????return (BTNode*)malloc(sizeof(BTNode));4 ????}
5.比较两个字符串(单词)大小
1 ????int strcmp(char *s1, char *s2)2 ????{3 ????????int i;4 ????????for (i = 0; s1[i] == s2[i]; i++) ???????//相等返回05 ????????????if (s1[i] == ‘\0‘)6 ????????????return 0;7 ????????if (s1[i] - s2[i] != 0) ????????????????//不相等返回18 ????????????return 1;9 ????}
5.构建单词二叉树
1 void insert(BTNode ** tree, char * count) { 2 ? ??BTNode * temp = NULL; 3 ?? ?if (!(*tree)) { 4 ??? ????temp = (BTNode*)malloc(sizeof(BTNode)); 5 ???? ???temp->lchild = temp->rchild = NULL; 6 ???????? temp->count = 1; 7 ??????? *tree = temp; 8 ??????? return; 9 ?? ?}10 11 ??? if (cmp(count, (char *)(*tree)->str)) {12 ??? ????insert(&(*tree)->lchild, count);13 ??? }14 ???? else if (cmp(count, (char *)(*tree)->str)) {15 ??? ????insert(&(*tree)->rchild, count);16 ??? }17 ???? else {18 ????? ??(*tree)->count++;19 ? ??}20 }
总结
对于统计单词出现的次数,我的想法是把文本内容看成一个个字符串,通过读取字符串,建立一个二叉树,建立二叉树的过程就是在进行单词频率统计工作,最后遍历一次二叉树就可以得到文本出现的所有单词,把单词和词频放入二维数组中。但是我的代码最终没能运行成功,没有实现作业要求的功能。
git: https://coding.net/u/a284617374/p/123/git/blob/master/fail.cpp
要改进
http://www.cnblogs.com/120626fj/p/7533435.html
原文地址:http://www.cnblogs.com/120626fj/p/7533435.html