#include "SS_Tokenizer.hpp"


static int  CharacterType(char c);
static bool SingleCharacterToken(char c);
static bool DoubleCharacterToken(const char* c);
static bool IsNumber(char c);


////////////////////////////////////////////////////////////////////////////////

void sTokenize(sVector<sToken>& tokens, const char* text)
{
  tokens.clear();

  // current line number
  int line = 0;

  const char* p = text;
  while (*p)
  {
    const char* begin_line = p;

    // skip whitespace
    while (*p && CharacterType(*p) == 0)
      p++;

    // if it's //, go until end of line
    if (*p == '/' && *(p + 1) == '/')
    {
      while (*p && *p != '\n')
        p++;
      continue;
    }

    // if it's a /*, go until */
    if (p[0] == '/' && p[1] == '*')
    {
      p += 2; // skip /*
      while (p[0] != '*' || p[1] != '/')
      {
        p++;
        if (*p == 0)
          throw sScriptException("/* without matching */");
      }
      p += 2; // skip */
      continue;
    }

    // process token
    if (*p)
    {
      sString token;
      int location = p - text;

      // check for double-character token
      if (*(p + 1) && DoubleCharacterToken(p))
      {
        token = *p++;
        token += *p++;
      } else

      // check for single-character token
      if (SingleCharacterToken(*p))
      {
        token = *p++;
      } else

      // if it's a string, go until next quotation marks
      if (*p == '"')
      {
        token += *p++;
        while (*p && *p != '"')
        {
          // can't have end-of-line characters in string literals
          if (*p == '\n' || *p == '\r')
            throw sScriptException("End of line not allowed in string literal", sToken(token, location, line));

          token += *p++;
        }

        // we know there are closing quotations here, so eat them up too
        if (*p)
          token += *p++;
      }
      else
      {
        // if it's a digit, grab all digits and decimals
        if (IsNumber(*p) || *p == '.')
        {
          while (IsNumber(*p) || *p == '.')
            token += *p++;
        }
        else
        {
          // go until character type changes
          int chartype = CharacterType(*p);
          while (*p && CharacterType(*p) == chartype)
            token += *p++;
        }
      }

      // add the token to the list
      tokens.push_back(sToken(token, location, line));
    }

    // count the number of lines
    for (const char* q = begin_line; q < p; q++)
      if (*q == '\n')
        line++;
  }

  // combine all adjacent string tokens
  for (int i = 0; i < (int)tokens.size() - 1; i++)
  {
    if (tokens[i].t[0] == '"' && tokens[i + 1].t[0] == '"')
    {
      const char* token1 = tokens[i].t.c_str();
      const char* token2 = tokens[i + 1].t.c_str();

      // combine the tokens, removing the end quotes from the first and the beginning quotes from the second
      char* combined_token = new char[strlen(token1) + strlen(token2) - 2 + 1];
      strcpy(combined_token, token1);
      combined_token[strlen(combined_token) - 1] = 0;
      strcat(combined_token, token2 + 1);

      tokens[i].t = combined_token;
      delete[] combined_token;

      // now shrink the list
      for (int j = i + 1; j < tokens.size() - 1; j++)
        tokens[j] = tokens[j + 1];
      tokens.pop_back();

      // step back, we may have to do it again
      i--;
    }
  }

  // put the end marker on
  tokens.push_back(sToken(END_TOKENS_MARKER, -1, 0));
}

////////////////////////////////////////////////////////////////////////////////

int CharacterType(char c)
{
  if (c == ' ' || c == '\t' || c == '\r' || c == '\n') return 0;
  if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_') return 1;
  if (c == ';') return 2;
  if (c == '{') return 3;
  if (c == '}') return 4;
  if (c == '[') return 5;
  if (c == ']') return 6;
  if (c == '(') return 7;
  if (c == ')') return 8;
  if (c == '+') return 9;
  if (c == '-') return 10;
  if (c == '*') return 11;
  if (c == '/') return 12;
  if (c == '=') return 13;
  return -1;
}

////////////////////////////////////////////////////////////////////////////////

bool SingleCharacterToken(char c)
{
  switch (c)
  {
    case '{': case '}':
    case '(': case ')':
    case '[': case ']':
    case '+': case '-':
    case '*': case '/':
    case ';': case '.': case ',':
      return true;
  }
  return false;
}

////////////////////////////////////////////////////////////////////////////////

bool DoubleCharacterToken(const char* c)
{
  if (memcmp(c, "+=", 2) == 0 ||
      memcmp(c, "-=", 2) == 0 ||
      memcmp(c, "*=", 2) == 0 ||
      memcmp(c, "/=", 2) == 0 ||
      memcmp(c, "==", 2) == 0 ||
      memcmp(c, "!=", 2) == 0 ||
      memcmp(c, "<=", 2) == 0 ||
      memcmp(c, ">=", 2) == 0 ||
      
      // these are only included so common errors will give logical messages
      memcmp(c, "++", 2) == 0 ||
      memcmp(c, "--", 2) == 0)
    return true;

  return false;
}

////////////////////////////////////////////////////////////////////////////////

bool IsNumber(char c)
{
  return (c >= '0' && c <= '9');
}

////////////////////////////////////////////////////////////////////////////////
