package org.gcube.socialnetworking.tokenizer;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class StringTokenizer {

	private final String originalString;
	
	private Pattern pattern;
	private Matcher matcher;
	
	private List<Token> tokens;
	
	public StringTokenizer(String string) {
		this.originalString = string;
		this.pattern = Pattern.compile("\\s");
	    this.matcher = pattern.matcher(originalString);
	}
	
	protected Token getToken(int tokenStart) {
		int tokenEnd = matcher.start();
		int delimiterStart = tokenEnd;
		int delimiterEnd = matcher.end(); 
		String tokenString = originalString.substring(tokenStart, tokenEnd);
		String delimiter = originalString.substring(delimiterStart, delimiterEnd);
		Token token = new Token(tokenString, delimiter, tokenStart, tokenEnd);
		return token;
	}

	public List<Token> getTokens() {
		if(tokens==null) {
			tokens = new ArrayList<>();
			int tokenStart = 0;
			while(matcher.find()) {
				Token token = getToken(tokenStart);
				tokens.add(token);
				tokenStart = matcher.end();
			}
			if(tokenStart!=originalString.length()){
				int tokenEnd = originalString.length();
				String tokenString = originalString.substring(tokenStart, tokenEnd);
				Token token = new Token(tokenString, "", tokenStart, tokenEnd);
				tokens.add(token);
			}
		}
		return tokens;
	}
	
}
