/*************************************************************************** * Copyright (C) 2005 by TAM(Teppei Tamra) * * tam-t@par.odn.ne.jp * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include "wordsdic.h" using namespace Honoka; WordsDic::WordsDic(const string &filename) { mmapptr = 0; fd = open(filename.c_str(),O_RDONLY); if (fd == -1) return; mmapsize = lseek(fd,0,SEEK_END); if (mmapsize == -1) { close(fd); fd = -1; return; } void *ptr = mmap(0,mmapsize,PROT_READ,MAP_PRIVATE,fd,0); if (ptr == MAP_FAILED) { close(fd); fd = -1; return; } mmapptr = (char *)ptr; return; } WordsDic::WordsDic(char *ptr,off_t size) { fd = -1; mmapptr = ptr; mmapsize = size; return; } WordsDic::~WordsDic() { if (fd != -1) { munmap(mmapptr,mmapsize); close(fd); } } /*! \fn WordsDic::find(const string &word) */ set<string> WordsDic::find(const string &word) { set<string> res,bres; if (word.length() > 255) return res; if (mmapptr == 0) return res; char w[256],ow[256]; for(unsigned int i = 0;i < word.length();i ++) { w[i] = (char)tolower(word[i]); ow[i] = word[i]; } w[word.length()] = 0; ow[word.length()] = 0; char *p = mmapptr; while(p < mmapptr + mmapsize) { if (p[0] == ' ') { p ++; continue; } char b[256],ob[256]; for(unsigned int i = 0;i < word.length();i ++) { b[i] = (char)tolower(p[i]); ob[i] = p[i]; } if (strncmp(w,b,word.length()) == 0) { string s; for(unsigned int i = 0;p[i] != '\n';i ++) { if (p[i] == 0) break; s += p[i]; } if (strncmp(ow,ob,word.length()) == 0) res.insert(s); else bres.insert(s); } while(*p != '\n') p ++; p ++; } for(set<string>::iterator it = bres.begin();it != bres.end();it ++) res.insert(*it); return res; } /*! \fn WordsDic::write(const string &word) */ bool WordsDic::write(const string &word) { char *p = mmapptr; // 重複チェックしつつpを末尾へ。 while(p < mmapptr + mmapsize) { if (strncmp(word.c_str(),p,word.length()) == 0) return false; while(*p != '\n') p ++; p ++; if (p[0] == 0) break; } // 空き領域サイズを確認。 // サイズが足らん時は頭数単語を消してシフトする。 if (((mmapptr + ((mmapsize - 1) * sizeof(char))) - p) < ((word.length() + 1 ) * sizeof(char))) { char *np = mmapptr; while(*np != 0) { while(*np != '\n') np ++; np ++; if ((np - mmapptr) > ((word.length() + 1 ) * sizeof(char))) { char *rp = mmapptr; while(*np != 0) { *rp = *np; np ++; rp ++; } *rp = 0; p = rp; break; } } } // 登録。 strcpy(p,word.c_str()); p[word.length()] = '\n'; p[word.length() + 1] = 0; return true; }