/***************************************************************************
  
  gbx_tokenize.c
  
  (c) Benoît Minisini <benoit.minisini@gambas-basic.org>

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2, or (at your option)
  any later version.
  
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  MA 02110-1301, USA.
  
***************************************************************************/

#define __GBX_TOKENIZE_C

#include "gb_common.h"
#include "gb_error.h"
#include "gb_array.h"
#include "gbx_string.h"
#include "gbx_c_array.h"

static void add_token(CARRAY *result, char *token)
{
	*((char **)ARRAY_add((char ***)&result->data)) = token;
}

#define add_token_len(_result, _token, _len) add_token(_result, STRING_new(_token, _len))

CARRAY *STRING_tokenize(const char *str, int len, const char *ident, int ident_len, CARRAY *string_limit, CARRAY *operator, bool keep_space)
{
	CARRAY *result;
	int p, p2;
	char c;
	
	result = (CARRAY *)OBJECT_create(CLASS_StringArray, NULL, NULL, 0);
	if (len == 0)
		return result;
	
	p = 0;
	
	while (p < len)
	{
		c = str[p];
		
		// space token
		
		if (c == ' ' || c == '\t')
		{
			p2 = p + 1;
			while (p2 < len)
			{
				c = str[p2];
				if (c != ' ' && c != '\t')
					break;
				p2++;
			}
			if (keep_space)
				add_token_len(result, &str[p], p2 - p);
			
			p = p2;
			continue;
		}
		
		// newlines
		
		if (c == '\n')
		{
			if (keep_space)
				add_token_len(result, "\n", 1);
			p++;
			continue;
		}
		
		// number token
		
		if (STRING_is_digit(c))
		{
			p2 = p + 1;
			while (p2 < len)
			{
				c = str[p2];
				if (!isdigit(c))
					break;
				p2++;
			}
			add_token_len(result, &str[p], p2 - p);
			
			p = p2;
			continue;
		}
		
		// identifier token
		
		if (STRING_is_letter(c))
		{
			p2 = p + 1;
			while (p2 < len)
			{
				c = str[p2];
				if (!STRING_is_alnum(c))
				{
					if (ident_len <= 0)
						break;
					
					if (!memchr(ident, c, ident_len))
						break;
				}
				p2++;
			}
			add_token_len(result, &str[p], p2 - p);
			p = p2;
			continue;
		}
		
		// string token
		
		if (string_limit)
		{
			int i;
			char *limit;
			char begin, end, quote;
			
			for (i = 0; i < string_limit->count; i++)
			{
				limit = ((char **)string_limit->data)[i];
				if (!limit)
					continue;
				
				begin = *limit++;
				if (begin == 0)
					continue;
				
				if (str[p] != begin)
					continue;
				
				end = *limit++;
				if (end == 0)
				{
					end = begin;
					quote = 0;
				}
				else
				{
					quote = *limit;
				}
				
				p2 = p + 1;
				while (p2 < len)
				{
					c = str[p2++];
					if (c == quote)
					{
						if (p2 < len)
							p2++;
					}
					else if (c == end)
						break;
				}
				add_token_len(result, &str[p], p2 - p);
				p = p2;
				
				goto __CONTINUE;
			}
		}
		
		// operator token
		
		if (operator)
		{
			int i;
			char *op;
			int op_len;
			
			for (i = 0; i < operator->count; i++)
			{
				op = ((char **)operator->data)[i];
				if (!op)
					continue;
				
				op_len = STRING_length(op);
				
				if ((op_len <= (len - p)) && memcmp(&str[p], op, op_len) == 0)
				{
					add_token(result, op);
					STRING_ref(op);
					p += op_len;
					goto __CONTINUE;
				}
			}
		}
		
		// one character token
		
		add_token_len(result, &str[p], 1);
		p++;
		
	__CONTINUE:;
	}
	
	result->count = ARRAY_count(result->data);
	return result;
}
