Newer
Older
scim-wnn / honoka-plugins / skkdic / src / skkdic.cpp
@tamra tamra on 28 Jun 2006 6 KB 送り仮名を無視。
/***************************************************************************
 *   Copyright (C) 2005 by TAM(Teppei Tamra)                               *
 *   tam-t@par.odn.ne.jp                                                   *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#include "skkdic.h"
#include <string.h>

using namespace Honoka;


SKKDic::SKKDic(String file)
{
    filename = file;
    fd = -1;
    iconv.set_encoding("EUC-JP");
    init();
}


SKKDic::~SKKDic()
{
    if (fd != -1) {
        munmap(mmapptr,mmapsize);
        close(fd);
    }
}




/*!
    \fn SKKDic::init()
 */
void SKKDic::init()
{
    if (1) {
        fd = open(filename.c_str(),O_RDONLY);
        if (fd == -1) return;
        mmapsize = lseek(fd,0,SEEK_END);
        if (mmapsize == -1) {
            close(fd);
            fd = -1;
            return;
        }
        void *ptr = mmap(0,mmapsize,PROT_READ,MAP_PRIVATE,fd,0);
        if (ptr == MAP_FAILED) {
            close(fd);
            fd = -1;
            return;
        }
        mmapptr = (char *)ptr;
        return;
    }
    
    bool okuri = true;
    FILE *f = fopen(filename.c_str(),"r");
    if (!f) return;
    while(-1) {
        char s[1024];
        if(fgets(s,1024,f) == NULL) break;
        if (String(s) == String(";; okuri-ari entries.")) {
            okuri = true;
            continue;
        } else if (String(s) == String(";; okuri-nasi entries.")) {
            okuri = false;
            continue;
        }
        if (String(s).length() >= 2)
            if (String(s).substr(0,2) == ";;") continue;
        
        WideString wstr,ent;
        iconv.convert(wstr,String(s));
        if (!wstr.length()) continue;
        for(unsigned int i = 0;i < wstr.length();i ++) {
            if (wstr.substr(i,1) == utf8_mbstowcs(String(" "))) {
                ent = wstr.substr(0,i);
                break;
            }
        }
        
        SKKDicEntry dic;
        vector<WideString> sList = parser(wstr);
        for(unsigned int i = 1;i < sList.size();i ++) {
            SKKDicEntryData e = annotationParser(sList[i]);
            dic.data.push_back(e);
        }
        dic.cache = true;
        dic.okuri = okuri;
        dic_data.insert(pair<WideString,SKKDicEntry>(ent,dic));
    }
    fclose(f);
}


/*!
    \fn SKKDic::parser(const WideString data)
 */
vector<WideString> SKKDic::parser(const WideString data)
{
    uint pos = 0,count = 0;
    vector<WideString> sList;
    // "/"でsplit。
    while(-1) {
        if ((pos + count) >= data.length()) break;
        if (data.at(pos + count) == utf8_mbstowcs(String("/"))[0]) {
            if (count) sList.push_back(data.substr(pos,count));
            pos += (count + 1);
            count = 0;
            continue;
        }
        count ++;
    }
    return sList;
}


/*!
    \fn SKKDic::find(WideString text)
 */
const vector<SKKDicEntryData> SKKDic::find(WideString text)
{
    vector<SKKDicEntryData> d;
    map<WideString,SKKDicEntry>::iterator it = dic_data.find(text);
    if (it != dic_data.end()) {
        if (it->second.cache) return it->second.data;
    }
    
    if (fd != -1) {
        char *p = mmapptr;
        bool okuri = true;
        String t;
        iconv.convert(t,text);
        while(p < mmapptr + mmapsize) {
            WideString w;
            if (strncmp(p,";; okuri-ari entries.",21) == 0) okuri = true;
            else if (strncmp(p,";; okuri-nasi entries.",22) == 0) okuri = false;
            else
            if ((strncmp(p,t.c_str(),t.length()) == 0) &&
                (((okuri == false) && (p[t.length()] == ' ')) || ((okuri == true) && (p[t.length() + 1] == ' '))) &&
                (strncmp(p,";;",2) != 0)) {
                String s;
                for(unsigned int i = 0;p[i] != '\n';i ++) {
                    if (p[i] == 0) break;
                    if ((okuri == false) || (i != t.length()))
                        s += p[i];
                }
                iconv.convert(w,s);
                vector<WideString> l = parser(w.substr(text.length() + 1));
                SKKDicEntry dic;
                for(unsigned int i = 0;i < l.size();i ++) {
                    SKKDicEntryData e = annotationParser(l[i]);
                    dic.data.push_back(e);
                }
                // キャッシュする?
                // dic.cache = true;
                // dic.okuri = okuri;
                // dic_data.insert(pair<WideString,SKKDicEntry>(text,dic));
                return dic.data;
            }
            while(p[0] != '\n') {
                p ++;
                if (p[0] == 0) break;
            }
            p ++;
        }
    }
    return d;
}


/*!
    \fn SKKDic::annotationParser(WideString)
 */
SKKDicEntryData SKKDic::annotationParser(WideString l)
{
    SKKDicEntryData e;
    if (l.length() <= 2) {
        e.kouho = l;
        return e;
    }
    for(unsigned int j = 1;j < (l.length() - 1);j ++) {
        if (l.at(j) == utf8_mbstowcs(String(";"))[0]) {
            e.annotation = l.substr(j + 1);
            e.kouho = l.substr(0,j);
            break;
        }
    }
    if (!e.kouho.length()) e.kouho = l;
    return e;

}