/*
 * created on 2004/10 by Shinya Kitaguchi (kitaguchi@ks.cs.inf.shizuoka.ac.jp)
 * 
 * ToDo:
 *
 */
/*
 * LICENSE:
 * 
 *     Copyright (C) 2006 Hidenao Abe (COIN Project)
 *
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License along
 *    with this program; if not, write to the Free Software Foundation, Inc.,
 *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 */
package coin.dataset_loader;

import java.util.StringTokenizer;
import java.util.Vector;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.FileNotFoundException;
import java.io.IOException;

import coin.Attribute;
import coin.Instance;

/**
 * @author Shinya Kitaguchi
 * @author Hidenao Abe (hidenao@users.sourceforge.jp)
 *
 */

public class ArffDatasetLoader extends DatasetLoader {
	
	private static final String UNKNOWN_VALUE="?";
	
	public ArffDatasetLoader(String arff_name){
		
		super();
	
		this.commentChar='%';
		this.fieldDelimiter=",";
		
		String names_file;
		String data_file;
		StringBuffer strb;
		Vector att_buf = new Vector();
		Vector data_buf = new Vector();
		String line = null;
		// remove extension from input string
		strb = new StringBuffer();
		
		// read arff_file
		try{
			InputStreamReader fr = new InputStreamReader(new FileInputStream(arff_name),"JISAutoDetect");
			BufferedReader br = new BufferedReader(fr);
			
			do {
				line = br.readLine();
			} while ( ! line.matches("@attribute.*") ) ;
			
			while ( line.matches("^@attribute.*") ) {
				att_buf.add(line.substring(line.indexOf("@attribute ")+11));
				line = br.readLine();
			}
			//System.out.println(att_buf.toString());
			
			// read data
			//System.out.println("line.length="+line.length());
			while (line.length()==0){
				line = br.readLine();
			}
			//System.out.println(line);
			if(line.matches(".*@data.*") || line.matches(".*@DATA.*")){
				line = br.readLine();
				// delete LF
				while ( line.length() == 0 ){
					line=br.readLine();
				}
				//System.out.println(line);
			
				while (line != null && line.length() > 0){
					//System.out.println(line);
					data_buf.add(line);				
					line = br.readLine();			
				}
			}
		}
		catch(FileNotFoundException fne){
			fne.printStackTrace();
		}
		catch(IOException ioe){
			ioe.printStackTrace();
		}
		
		// read Attributes + class 
		readAttributes(att_buf);
		// read Instances
		readInstances(data_buf);
		// read dataset
		// System.out.println(data_file);
		//readInstances(line_buf);
	
		// set dataset's(relation) name to instances and attributeInfo
		this.instances.setName(arff_name);
		this.attributeInfo.setName(arff_name);
		
		// set attribute information to the data set
		this.instances.setAttributeInfo(this.attributeInfo);
		
		// set class index
		this.attributeInfo.setClassIndex(this.attributeInfo.getNumAttributes() -1); 
		
		// get minimum and maximim values on numeric attributes
		this.setMaxMinValues();
	}
	
	public ArffDatasetLoader(String arff_name, int class_index){
		
		super();
	
		this.commentChar='%';
		this.fieldDelimiter=",";
		
		String names_file;
		String data_file;
		StringBuffer strb;
		Vector att_buf = new Vector();
		Vector data_buf = new Vector();
		String line = null;
		// remove extension from input string
		strb = new StringBuffer();
		
		// read arff_file
		try{
			InputStreamReader fr = new InputStreamReader(new FileInputStream(arff_name),"JISAutoDetect");
			BufferedReader br = new BufferedReader(fr);
			
			do {
				line = br.readLine();
			} while ( ! line.matches("@attribute.*") ) ;
			
			while ( line.matches("^@attribute.*") ) {
				att_buf.add(line.substring(line.indexOf("@attribute ")+11));
				line = br.readLine();
			}
			//System.out.println(att_buf.toString());
			
			// read data
			//System.out.println("line.length="+line.length());
			while (line.length()==0){
				line = br.readLine();
			}
			//System.out.println(line);
			if(line.matches(".*@data.*") || line.matches(".*@DATA.*")){
				line = br.readLine();
				// delete LF
				while ( line.length() == 0 ){
					line=br.readLine();
				}
				//System.out.println(line);
			
				while (line != null && line.length() > 0){
					//System.out.println(line);
					data_buf.add(line);				
					line = br.readLine();			
				}
			}
		}
		catch(FileNotFoundException fne){
			fne.printStackTrace();
		}
		catch(IOException ioe){
			ioe.printStackTrace();
		}
		
		// read Attributes + class 
		readAttributes(att_buf);
		// read Instances
		readInstances(data_buf);
		// read dataset
		// System.out.println(data_file);
		//readInstances(line_buf);
	
		// set dataset's(relation) name to instances and attributeInfo
		this.instances.setName(arff_name);
		this.attributeInfo.setName(arff_name);
		
		// set attribute information to the data set
		this.instances.setAttributeInfo(this.attributeInfo);
		
		// set class index
		this.attributeInfo.setClassIndex(class_index); 
		
		// get minimum and maximim values on numeric attributes
		this.setMaxMinValues();
	}
	
	private void readAttributes(Vector line_buf){
		
		String line=null,classLine=null;
		Attribute classAtt;
		
		// get attribute
		for(int v=0; v<line_buf.size()-1; v++){
			line =(String)line_buf.elementAt(v);

			// skip this line, if the first character is commentChar
			int i=0;
			while(Character.isSpaceChar(line.charAt(i))){
				i++;
			}
			if(line.charAt(i)!=commentChar){
				attributeInfo.addAttribute(readAttribute(line));
			}
			
		}
		
		// get class
		line =(String)line_buf.elementAt(line_buf.size()-1);
	
		// append class information
		attributeInfo.addAttribute(readClass(line));
		
		// initialize attribute id
		attributeInfo.initAttributeID();
	}
	
	private Attribute readAttribute(String line){
		
		Attribute result;
		StringBuffer name_buf,value_buf;
		int i;
		char c;
		boolean isValue=false, isQuated=false;
		
		result = new Attribute();
		name_buf = new StringBuffer();
		value_buf = new StringBuffer();
	
		//System.out.println(line);
	
		for(i=0; i<line.length(); i++){
			c = line.charAt(i);
			//System.out.println(c);
			//System.out.println("isValue:"+Boolean.toString(isValue));
			if(Character.isSpaceChar(c) && name_buf.toString().length() == 0 ){
				continue;
			}			
			if ( c == commentChar || c == '}' ){
				break;
			}
			else if((! isValue) && (c == '\'')&& !isQuated){ isQuated = true; }
			else if((! isValue) && c != '{' && ( !Character.isSpaceChar(c)) &&
					(c != '\'') && !isQuated){
				name_buf.append(c);
			}
			else if((! isValue) && c != '{' && (c != '\'') && isQuated){
				name_buf.append(c);
			}
			//else if((! isValue) && (! Character.isLetterOrDigit(c))){
			else if((! isValue) && (Character.isSpaceChar(c) || c=='{' || (isQuated && c =='\''))){
				//System.out.print("name_buf: "+name_buf.toString());
				result.setName(name_buf.toString());				
				name_buf.delete(0,name_buf.toString().length());
				isValue = true;
			}
			else if(isValue && c == '['){
				break;
			}
			else if(isValue && c != '\t' && !Character.isSpaceChar(c) && c != '{' &&
					(c != '\'')){
				value_buf.append(c);
			}
			else{
				continue;
			}
		}
		//System.out.println(":"+value_buf.toString());
		if(value_buf.toString().equals("numeric")
				|| value_buf.toString().equals("real")
				|| value_buf.toString().equals("integer")
				||value_buf.toString().equals("NUMERIC")
				|| value_buf.toString().equals("REAL")
				|| value_buf.toString().equals("INTEGER")
				||value_buf.toString().equals("Numeric")
				|| value_buf.toString().equals("Real")
				|| value_buf.toString().equals("Integer")){
			result.setType(Attribute.NUMERIC);
		}
		else{		
			result.setType(Attribute.NOMINAL);
			result.setNominalValues(value_buf.toString(), fieldDelimiter);
		}
		
		return result;
	}
	private Attribute readClass(String line){
		
		Attribute result;
		StringBuffer name_buf,value_buf;
		int i;
		char c;
		boolean isValue=false;
		
		result = new Attribute();
		name_buf = new StringBuffer();
		value_buf = new StringBuffer();
	
		//System.out.println(line);
		
		for(i=0; i<line.length(); i++){
			c = line.charAt(i);
			//System.out.println(c);
			//System.out.println("isValue:"+Boolean.toString(isValue));
			if(Character.isSpaceChar(c) && name_buf.toString().length() == 0 ){
				continue;
			}			
			if ( c == '%' || c == '}' ){
				break;
			}
			//else if((! isValue) && (Character.isLetterOrDigit(c) && c != '-')){
			else if((! isValue) && c != '{' && (! Character.isSpaceChar(c)) && (c != '\'')){
				name_buf.append(c);
			}			
			//else if((! isValue) && (! Character.isLetterOrDigit(c))){
			else if((! isValue) && (Character.isSpaceChar(c) || c=='{')){
				// System.out.println("name_buf: "+name_buf.toString());
				result.setName(name_buf.toString());
				name_buf.delete(0,name_buf.toString().length());
				isValue = true;
			}
			else if(isValue && c != '\t' && !Character.isSpaceChar(c) && c != '{' && (c != '\'') ){
				value_buf.append(c);
			}
			else{
				continue;
			}
		}
		// System.out.println("value:"+value_buf.toString());
		result.setType(Attribute.NOMINAL);
		result.setNominalValues(value_buf.toString(), fieldDelimiter);
		return result;
	}
	
	private void readInstances(Vector line_buf){
		
		String line=null;
		int lines=0;
		
		
		for(int v=0; v<line_buf.size(); v++){
			line =(String)line_buf.elementAt(v);			
			// skip this line, if the first character is commentChar
			int i=0;
			while(Character.isSpaceChar(line.charAt(i))){
				i++;
			}
			if(line.charAt(i)!= commentChar){
				if(! line.equals("\n")){
					instances.addInstance(readInstance(line));
					lines++;
				}
				else{
					continue;
				}
			}
		}		
	}
	
	private Instance readInstance(String line){
		
		Instance instance;
		
		instance = new Instance();
		
		StringTokenizer strt;
		strt = new StringTokenizer(line,fieldDelimiter);
		
		int index=0;
		while(strt.hasMoreTokens()){
			String token = null;
			String tmp = strt.nextToken(); // skipping space characters
			StringBuffer t_buf= new StringBuffer();
			int j=0;
			for(j=0; j<tmp.length(); j++){
				char c = tmp.charAt(j);
				if( c != '\'' && c != '"'){
					t_buf.append(c);
				}
			}
			token = t_buf.toString();
			//System.out.println(token);
			// count number of each nominal value
			if(token.equals(ArffDatasetLoader.UNKNOWN_VALUE)){
				instance.addValue(Instance.UNKNOWN_VALUE);
			}
			else{
				instance.addValue(token);
				if(attributeInfo.getAttribute(index).getType() == Attribute.NOMINAL){
					int t=attributeInfo.getAttribute(index).getNumOfNominalValue(token);
					t++;
					attributeInfo.getAttribute(index).setNumOfNominalValue(token,t);
				}			
			}
			index++;
		}
		
		return instance;
	}
	
	
	
	private void setMaxMinValues(){
		
		int i=0;
		for(i=0; i<attributeInfo.getNumAttributes(); i++){
			Attribute att = attributeInfo.getAttribute(i);
			
			if(att.getType() == Attribute.NUMERIC){
				int j=0;
				double max=Double.MIN_VALUE;
				double min=Double.MAX_VALUE;
				for(j=0; j<instances.size(); j++){
					Instance inst = instances.getInstance(j);
					String value = inst.getValueAt(i);
					if(! value.equals(Instance.UNKNOWN_VALUE)){
						double t_value = Double.valueOf(value).doubleValue();
						if(max < t_value){
							max = t_value;
						}
						else if(min > t_value){
							min = t_value;
						}
						else{ continue; }
					}
				}
				att.setMaxValue(Float.toString((float)max));
				att.setMinValue(Float.toString((float)min));
			}
		}	
	}
	
	public String toString(){
		
		StringBuffer result_buf = new StringBuffer();
		
		result_buf.append(attributeInfo.toString());
		//instances.print();
		
		return result_buf.toString();
	}
	
	public static void main(String[] args){
		
		try{
			ArffDatasetLoader dset = new ArffDatasetLoader(args[0]);
			System.out.println(dset.toString());
		}
		catch(Exception e){
			e.printStackTrace();
		}
		
	}

}
