/***************************************************************************
* Copyright (C) 2005 by TAM(Teppei Tamra) *
* tam-t@par.odn.ne.jp *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#include "wordsdic.h"
using namespace Honoka;
WordsDic::WordsDic(const string &filename)
{
mmapptr = 0;
fd = open(filename.c_str(),O_RDONLY);
if (fd == -1) return;
mmapsize = lseek(fd,0,SEEK_END);
if (mmapsize == -1) {
close(fd);
fd = -1;
return;
}
void *ptr = mmap(0,mmapsize,PROT_READ,MAP_PRIVATE,fd,0);
if (ptr == MAP_FAILED) {
close(fd);
fd = -1;
return;
}
mmapptr = (char *)ptr;
return;
}
WordsDic::WordsDic(char *ptr,off_t size)
{
fd = -1;
mmapptr = ptr;
mmapsize = size;
return;
}
WordsDic::~WordsDic()
{
if (fd != -1) {
munmap(mmapptr,mmapsize);
close(fd);
}
}
/*!
\fn WordsDic::find(const string &word)
*/
set<string> WordsDic::find(const string &word)
{
set<string> res,bres;
if (word.length() > 255) return res;
if (mmapptr == 0) return res;
char w[256],ow[256];
for(unsigned int i = 0;i < word.length();i ++) {
w[i] = (char)tolower(word[i]);
ow[i] = word[i];
}
w[word.length()] = 0;
ow[word.length()] = 0;
char *p = mmapptr;
while(p < mmapptr + mmapsize) {
if (p[0] == ' ') {
p ++;
continue;
}
char b[256],ob[256];
for(unsigned int i = 0;i < word.length();i ++) {
b[i] = (char)tolower(p[i]);
ob[i] = p[i];
}
if (strncmp(w,b,word.length()) == 0) {
string s;
for(unsigned int i = 0;p[i] != '\n';i ++) {
if (p[i] == 0) break;
s += p[i];
}
if (strncmp(ow,ob,word.length()) == 0)
res.insert(s);
else bres.insert(s);
}
while(*p != '\n') p ++;
p ++;
}
for(set<string>::iterator it = bres.begin();it != bres.end();it ++) res.insert(*it);
return res;
}
/*!
\fn WordsDic::write(const string &word)
*/
bool WordsDic::write(const string &word)
{
char *p = mmapptr;
// 重複チェックしつつpを末尾へ。
while(p < mmapptr + mmapsize) {
if (strncmp(word.c_str(),p,word.length()) == 0) return false;
while(*p != '\n') p ++;
p ++;
if (p[0] == 0) break;
}
// 空き領域サイズを確認。
// サイズが足らん時は頭数単語を消してシフトする。
if (((mmapptr + ((mmapsize - 1) * sizeof(char))) - p) < ((word.length() + 1 ) * sizeof(char))) {
char *np = mmapptr;
while(*np != 0) {
while(*np != '\n') np ++;
np ++;
if ((np - mmapptr) > ((word.length() + 1 ) * sizeof(char))) {
char *rp = mmapptr;
while(*np != 0) {
*rp = *np;
np ++;
rp ++;
}
*rp = 0;
p = rp;
break;
}
}
}
// 登録。
strcpy(p,word.c_str());
p[word.length()] = '\n';
p[word.length() + 1] = 0;
return true;
}