/***************************************************************************
* Copyright (C) 2005 by TAM(Teppei Tamra) *
* tam-t@par.odn.ne.jp *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#include "skkdic.h"
#include <string.h>
using namespace Honoka;
SKKDic::SKKDic(String file)
{
filename = file;
fd = -1;
iconv.set_encoding("EUC-JP");
init();
}
SKKDic::~SKKDic()
{
if (fd != -1) {
munmap(mmapptr,mmapsize);
close(fd);
}
}
/*!
\fn SKKDic::init()
*/
void SKKDic::init()
{
if (1) {
fd = open(filename.c_str(),O_RDONLY);
if (fd == -1) return;
mmapsize = lseek(fd,0,SEEK_END);
if (mmapsize == -1) {
close(fd);
fd = -1;
return;
}
void *ptr = mmap(0,mmapsize,PROT_READ,MAP_PRIVATE,fd,0);
if (ptr == MAP_FAILED) {
close(fd);
fd = -1;
return;
}
mmapptr = (char *)ptr;
return;
}
bool okuri = true;
FILE *f = fopen(filename.c_str(),"r");
if (!f) return;
while(-1) {
char s[1024];
if(fgets(s,1024,f) == NULL) break;
if (String(s) == String(";; okuri-ari entries.")) {
okuri = true;
continue;
} else if (String(s) == String(";; okuri-nasi entries.")) {
okuri = false;
continue;
}
if (String(s).length() >= 2)
if (String(s).substr(0,2) == ";;") continue;
WideString wstr,ent;
iconv.convert(wstr,String(s));
if (!wstr.length()) continue;
for(unsigned int i = 0;i < wstr.length();i ++) {
if (wstr.substr(i,1) == utf8_mbstowcs(String(" "))) {
ent = wstr.substr(0,i);
break;
}
}
SKKDicEntry dic;
vector<WideString> sList = parser(wstr);
for(unsigned int i = 1;i < sList.size();i ++) {
SKKDicEntryData e = annotationParser(sList[i]);
dic.data.push_back(e);
}
dic.cache = true;
dic.okuri = okuri;
dic_data.insert(pair<WideString,SKKDicEntry>(ent,dic));
}
fclose(f);
}
/*!
\fn SKKDic::parser(const WideString data)
*/
vector<WideString> SKKDic::parser(const WideString data)
{
uint pos = 0,count = 0;
vector<WideString> sList;
// "/"でsplit。
while(-1) {
if ((pos + count) >= data.length()) break;
if (data.at(pos + count) == utf8_mbstowcs(String("/"))[0]) {
if (count) sList.push_back(data.substr(pos,count));
pos += (count + 1);
count = 0;
continue;
}
count ++;
}
return sList;
}
/*!
\fn SKKDic::find(WideString text)
*/
const vector<SKKDicEntryData> SKKDic::find(WideString text)
{
vector<SKKDicEntryData> d;
map<WideString,SKKDicEntry>::iterator it = dic_data.find(text);
if (it != dic_data.end()) {
if (it->second.cache) return it->second.data;
}
if (fd != -1) {
char *p = mmapptr;
bool okuri = true;
String t;
iconv.convert(t,text);
while(p < mmapptr + mmapsize) {
WideString w;
if (strncmp(p,";; okuri-ari entries.",21) == 0) okuri = true;
else if (strncmp(p,";; okuri-nasi entries.",22) == 0) okuri = false;
else
if ((strncmp(p,t.c_str(),t.length()) == 0) &&
(((okuri == false) && (p[t.length()] == ' ')) || ((okuri == true) && (p[t.length() + 1] == ' '))) &&
(strncmp(p,";;",2) != 0)) {
String s;
for(unsigned int i = 0;p[i] != '\n';i ++) {
if (p[i] == 0) break;
if ((okuri == false) || (i != t.length()))
s += p[i];
}
iconv.convert(w,s);
vector<WideString> l = parser(w.substr(text.length() + 1));
SKKDicEntry dic;
for(unsigned int i = 0;i < l.size();i ++) {
SKKDicEntryData e = annotationParser(l[i]);
dic.data.push_back(e);
}
// キャッシュする?
// dic.cache = true;
// dic.okuri = okuri;
// dic_data.insert(pair<WideString,SKKDicEntry>(text,dic));
return dic.data;
}
while(p[0] != '\n') {
p ++;
if (p[0] == 0) break;
}
p ++;
}
}
return d;
}
/*!
\fn SKKDic::annotationParser(WideString)
*/
SKKDicEntryData SKKDic::annotationParser(WideString l)
{
SKKDicEntryData e;
if (l.length() <= 2) {
e.kouho = l;
return e;
}
for(unsigned int j = 1;j < (l.length() - 1);j ++) {
if (l.at(j) == utf8_mbstowcs(String(";"))[0]) {
e.annotation = l.substr(j + 1);
e.kouho = l.substr(0,j);
break;
}
}
if (!e.kouho.length()) e.kouho = l;
return e;
}