package morpheme;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import double_array.Builder;
import double_array.Searcher;
import double_array.BinaryFileWriter;
import double_array.BinaryFileReader;

public class WordDic {
    private String encoding;
    private String inputDir;
    private String outputDir;

    // XXX: bad interface -> WordDic.Generator
    public WordDic(String inputDir, String outputDir, String encoding) {
	this.inputDir = inputDir;
	this.outputDir = outputDir;
	this.encoding = encoding;
    }

    private Searcher da;
    private String dat;
    private Word[] words;
    private int[] indices;
    public WordDic(String outputDir) throws IOException {
	da = new Searcher(outputDir+"/word->id");
	dat = readData(outputDir+"/word.dat");
	words = readWords(outputDir+"/word.inf");
	indices = readIndices(outputDir+"/word.ary.idx");
    }
    
    class Collect implements Searcher.Callback {
	public List<MorphemeNode> ms;
	public Collect(List<MorphemeNode> result) { ms = result; }
	
	public void call(int offset, int id) {
	    for(int i=indices[id]; i < indices[id+1]; i++)
		ms.add(new MorphemeNode(words[i], offset));
	}
    }
    
    public void search(String text, List<MorphemeNode> result) {
	Collect fn = new Collect(result);
	da.eachCommonPrefix(text,fn);
    }

    public void searchFromId(int wordId, int wordLength, List<MorphemeNode> result) {
	for(int i=indices[wordId]; i < indices[wordId+1]; i++)
	    result.add(new MorphemeNode(words[i],wordLength));
    }

    public Word getWord(int wordId) {
	return words[wordId];
    }

    public String getData(Word w) {
	return dat.substring(w.indexOfData, w.indexOfData+w.dataLength);
    }

    private String readData(String path) throws IOException {
	StringBuilder sb = new StringBuilder();
	BinaryFileReader br = new BinaryFileReader(path);
	try {
	    for(int i=0; i < br.length()/2; i++)
		sb.append(br.readChar());
	} finally {
	    br.close();
	}
	return sb.toString();
    }

    private Word[] readWords(String path) throws IOException {
	BinaryFileReader br = new BinaryFileReader(path);
	final int size = (int)br.length()/(4*4);

	Word[] ws = new Word[size];
	try {
	    for(int i=0; i < size; i++)
		ws[i] = new Word(br.readInt(),br.readInt(),br.readInt(),br.readInt(),br.readInt());
	} finally {
	    br.close();
	}
	return ws;
    }

    private int[] readIndices(String path) throws IOException {
	BinaryFileReader br = new BinaryFileReader(path);
	final int size = (int)br.length()/4;;

	int[] ws = new int[size];
	try {
	    for(int i=0; i < size; i++)
		ws[i] = br.readInt();
	} finally {
	    br.close();
	}
	return ws;
    }

    public void genWordIdMap() throws IOException {
	System.out.println("== collect key ==");
	List<String> keyList = new ArrayList<String>();

	// 未知語定義からキーを集める
	try {
	    ReadLine rl = new ReadLine(inputDir+"/unk.def", encoding);
	    String line;
	    while((line = rl.read()) != null) {
		final String key = line.substring(0,line.indexOf(','));
		keyList.add("\t"+key); // XXX: 応急処置: 未知語が通常の単語と衝突しないように
	    }
	    rl.close();
	} catch (IOException e) {
	    e.printStackTrace();
	}
	
	// 単語辞書からキーを集める
	for(File csvFile : new File(inputDir).listFiles(new onlyCsv())) {
	    try {
		ReadLine rl = new ReadLine(csvFile, encoding);
		String line;
		while((line = rl.read()) != null) {
		    final String key = line.substring(0,line.indexOf(','));
		    keyList.add(key);
		}

		rl.close(); // TODO: finally
	    } catch(IOException e) {
		e.printStackTrace();
	    }
	}
	
	System.out.println("== build index ==");
	Builder bld = new Builder(keyList);
	bld.build();
	
	System.out.println("== save ==");
	bld.save(outputDir+"/word->id");
    }

    // 今は、もう不要(2010/03/08) => wordがdataの長さを持つようになったので
    class WordWithData extends Word {
	public String data;
	public WordWithData(int lid, int rid, int c, int idx, String data) {
	    super(lid,rid,c,idx,data.length());
	    this.data=data;
	}
    }

    public void genWordInfo() throws IOException {
	final Searcher wid = new Searcher(outputDir+"/word->id");
	BinaryFileWriter wdat = new BinaryFileWriter(outputDir+"/word.dat");
	ArrayList<ArrayList<Word>> ws = new ArrayList<ArrayList<Word>>(wid.size());
	for(int i=0; i < wid.size(); i++)
	    ws.add(new ArrayList<Word>());

	System.out.println("== 1 ==");
	
	int dataIndex = 0;
	
	// 未知語定義からデータを集める
	try {
	    ReadLine rl = new ReadLine(inputDir+"/unk.def", encoding);
	    String s;
	    while((s = rl.read()) != null) {
		final int p1 = s.indexOf(',');      // key
		final int p2 = s.indexOf(',',p1+1); // left id
		final int p3 = s.indexOf(',',p2+1); // right id
		final int p4 = s.indexOf(',',p3+1); // cost
		//wdat.writeString(s.substring(p4+1));// data
		
		//dataIndex += s.substring(p4+1).length();
		
		final int id = wid.search("\t"+s.substring(0,p1)).id();
		ws.get(id).add(new WordWithData(Integer.valueOf(s.substring(p1+1,p2)),
						 Integer.valueOf(s.substring(p2+1,p3)),
						 Integer.valueOf(s.substring(p3+1,p4)),
						 0,//dataIndex,
						 s.substring(p4+1)));
	    }
	    rl.close();
	} catch (IOException e) {
	    e.printStackTrace();
	}
	
	// 単語辞書からデータを集める
	for(File csvFile : new File(inputDir).listFiles(new onlyCsv())) {
	    try {
		ReadLine rl = new ReadLine(csvFile, encoding);
		String s;
		while((s = rl.read()) != null) {
		    final int p1 = s.indexOf(',');      // key
		    final int p2 = s.indexOf(',',p1+1); // left id
		    final int p3 = s.indexOf(',',p2+1); // right id
		    final int p4 = s.indexOf(',',p3+1); // cost
		    //wdat.writeString(s.substring(p4+1));// data
		    
		    //dataIndex += s.substring(p4+1).length();
		    
		    final int id = wid.search(s.substring(0,p1)).id();
		    ws.get(id).add(new WordWithData(Integer.valueOf(s.substring(p1+1,p2)),
						    Integer.valueOf(s.substring(p2+1,p3)),
						    Integer.valueOf(s.substring(p3+1,p4)),
						    0,//dataIndex,
						    s.substring(p4+1)));
		}
		rl.close();
	    } catch(IOException e) {
		e.printStackTrace();
	    }
	}
	
	//
	System.out.println("== 2 ==");
	BinaryFileWriter winf = new BinaryFileWriter(outputDir+"/word.inf");
	for(ArrayList<Word> wlist : ws)
	    for(Word w : wlist){
		winf.writeInt(w.leftId);
		winf.writeInt(w.rightId);
		winf.writeInt(w.cost);
		//winf.writeInt(w.indexOfData);
		winf.writeInt(dataIndex);

		WordWithData wd = (WordWithData)w;
		winf.writeInt(wd.data.length());    // XXX: 整理
		wdat.writeString(wd.data);
		dataIndex += wd.data.length();
	    }

	winf.close();
	wdat.close();

	// 
	System.out.println("== 3 ==");
	BinaryFileWriter wary = new BinaryFileWriter(outputDir+"/word.ary.idx");
	int begIndex=0;
	for(ArrayList<Word> wlist : ws) {
	    wary.writeInt(begIndex);
	    begIndex += wlist.size();
	}
	wary.writeInt(begIndex);
	wary.close();
    }

    class onlyCsv implements FileFilter {
	public boolean accept(File pathname) {
	    return pathname.isFile() && pathname.toString().matches(".*\\.csv$");
	}
    }
}