Newer
Older
scim-wnn / honoka-plugins / ascii / src / wordsdic.cpp
/***************************************************************************
 *   Copyright (C) 2005 by TAM(Teppei Tamra)                               *
 *   tam-t@par.odn.ne.jp                                                   *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#include "wordsdic.h"
using namespace Honoka;

WordsDic::WordsDic(const string &filename)
{
    mmapptr = 0;
    fd = open(filename.c_str(),O_RDONLY);
    if (fd == -1) return;
    mmapsize = lseek(fd,0,SEEK_END);
    if (mmapsize == -1) {
        close(fd);
        fd = -1;
        return;
    }
    void *ptr = mmap(0,mmapsize,PROT_READ,MAP_PRIVATE,fd,0);
    if (ptr == MAP_FAILED) {
        close(fd);
        fd = -1;
        return;
    }
    mmapptr = (char *)ptr;
    return;
}

WordsDic::WordsDic(char *ptr,off_t size)
{
    fd = -1;
    mmapptr = ptr;
    mmapsize = size;
    return;
}

WordsDic::~WordsDic()
{
    if (fd != -1) {
        munmap(mmapptr,mmapsize);
        close(fd);
    }
}




/*!
    \fn WordsDic::find(const string &word)
 */
set<string> WordsDic::find(const string &word)
{
    set<string> res,bres;
    if (word.length() > 255) return res;
    if (mmapptr == 0) return res;
    char w[256],ow[256];
    for(unsigned int i = 0;i < word.length();i ++) {
        w[i] = (char)tolower(word[i]);
        ow[i] = word[i];
    }
    w[word.length()] = 0;
    ow[word.length()] = 0;
    char *p = mmapptr;
    while(p < mmapptr + mmapsize) {
        if (p[0] == ' ') {
            p ++;
            continue;
        }
        char b[256],ob[256];
        for(unsigned int i = 0;i < word.length();i ++) {
            b[i] = (char)tolower(p[i]);
            ob[i] = p[i];
        }
        if (strncmp(w,b,word.length()) == 0) {
            string s;
            for(unsigned int i = 0;p[i] != '\n';i ++) {
                if (p[i] == 0) break;
                s += p[i];
            }
            if (strncmp(ow,ob,word.length()) == 0)
                res.insert(s);
            else bres.insert(s);
        }
        while(*p != '\n') p ++;
        p ++;
    }
    for(set<string>::iterator it = bres.begin();it != bres.end();it ++) res.insert(*it);
    return res;
}

/*!
    \fn WordsDic::write(const string &word)
 */
bool WordsDic::write(const string &word)
{
    char *p = mmapptr;
    // 重複チェックしつつpを末尾へ。
    while(p < mmapptr + mmapsize) {
        if (strncmp(word.c_str(),p,word.length()) == 0) return false;
        while(*p != '\n') p ++;
        p ++;
        if (p[0] == 0) break;
    }
    // 空き領域サイズを確認。
    // サイズが足らん時は頭数単語を消してシフトする。
    if (((mmapptr + ((mmapsize - 1) * sizeof(char))) - p) < ((word.length() + 1 ) * sizeof(char))) {
        char *np = mmapptr;
        while(*np != 0) {
            while(*np != '\n') np ++;
            np ++;
            if ((np - mmapptr) > ((word.length() + 1 ) * sizeof(char))) {
                char *rp = mmapptr;
                while(*np != 0) {
                    *rp = *np;
                    np ++;
                    rp ++;
                }
                *rp = 0;
                p = rp;
                break;
            }
        }
    }
    // 登録。
    strcpy(p,word.c_str());
    p[word.length()] = '\n';
    p[word.length() + 1] = 0;
    return true;
}