/*************************************************************************** * Copyright (C) 2005 by TAM(Teppei Tamra) * * tam-t@par.odn.ne.jp * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include "skkdic.h" #include <string.h> using namespace Honoka; SKKDic::SKKDic(String file) { filename = file; fd = -1; iconv.set_encoding("EUC-JP"); init(); } SKKDic::~SKKDic() { if (fd != -1) { munmap(mmapptr,mmapsize); close(fd); } } /*! \fn SKKDic::init() */ void SKKDic::init() { if (1) { fd = open(filename.c_str(),O_RDONLY); if (fd == -1) return; mmapsize = lseek(fd,0,SEEK_END); if (mmapsize == -1) { close(fd); fd = -1; return; } void *ptr = mmap(0,mmapsize,PROT_READ,MAP_PRIVATE,fd,0); if (ptr == MAP_FAILED) { close(fd); fd = -1; return; } mmapptr = (char *)ptr; return; } bool okuri = true; FILE *f = fopen(filename.c_str(),"r"); if (!f) return; while(-1) { char s[1024]; if(fgets(s,1024,f) == NULL) break; if (String(s) == String(";; okuri-ari entries.")) { okuri = true; continue; } else if (String(s) == String(";; okuri-nasi entries.")) { okuri = false; continue; } if (String(s).length() >= 2) if (String(s).substr(0,2) == ";;") continue; WideString wstr,ent; iconv.convert(wstr,String(s)); if (!wstr.length()) continue; for(unsigned int i = 0;i < wstr.length();i ++) { if (wstr.substr(i,1) == utf8_mbstowcs(String(" "))) { ent = wstr.substr(0,i); break; } } SKKDicEntry dic; vector<WideString> sList = parser(wstr); for(unsigned int i = 1;i < sList.size();i ++) { SKKDicEntryData e = annotationParser(sList[i]); dic.data.push_back(e); } dic.cache = true; dic.okuri = okuri; dic_data.insert(pair<WideString,SKKDicEntry>(ent,dic)); } fclose(f); } /*! \fn SKKDic::parser(const WideString data) */ vector<WideString> SKKDic::parser(const WideString data) { uint pos = 0,count = 0; vector<WideString> sList; // "/"でsplit。 while(-1) { if ((pos + count) >= data.length()) break; if (data.at(pos + count) == utf8_mbstowcs(String("/"))[0]) { if (count) sList.push_back(data.substr(pos,count)); pos += (count + 1); count = 0; continue; } count ++; } return sList; } /*! \fn SKKDic::find(WideString text) */ const vector<SKKDicEntryData> SKKDic::find(WideString text) { vector<SKKDicEntryData> d; map<WideString,SKKDicEntry>::iterator it = dic_data.find(text); if (it != dic_data.end()) { if (it->second.cache) return it->second.data; } if (fd != -1) { char *p = mmapptr; bool okuri = true; String t; iconv.convert(t,text); while(p < mmapptr + mmapsize) { WideString w; if (strncmp(p,";; okuri-ari entries.",21) == 0) okuri = true; else if (strncmp(p,";; okuri-nasi entries.",22) == 0) okuri = false; else if ((strncmp(p,t.c_str(),t.length()) == 0) && (p[t.length()] == ' ') && (strncmp(p,";;",2) != 0)) { String s; for(unsigned int i = 0;p[i] != '\n';i ++) { if (p[i] == 0) break; s += p[i]; } iconv.convert(w,s); vector<WideString> l = parser(w.substr(text.length() + 1)); SKKDicEntry dic; for(unsigned int i = 0;i < l.size();i ++) { SKKDicEntryData e = annotationParser(l[i]); dic.data.push_back(e); } // キャッシュする? // dic.cache = true; // dic.okuri = okuri; // dic_data.insert(pair<WideString,SKKDicEntry>(text,dic)); return dic.data; } while(p[0] != '\n') { p ++; if (p[0] == 0) break; } p ++; } } return d; } /*! \fn SKKDic::annotationParser(WideString) */ SKKDicEntryData SKKDic::annotationParser(WideString l) { SKKDicEntryData e; if (l.length() <= 2) { e.kouho = l; return e; } for(unsigned int j = 1;j < (l.length() - 1);j ++) { if (l.at(j) == utf8_mbstowcs(String(";"))[0]) { e.annotation = l.substr(j + 1); e.kouho = l.substr(0,j); break; } } if (!e.kouho.length()) e.kouho = l; return e; }