root/clib/wstring.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. word_string
  2. word_string
  3. word_string
  4. word_string
  5. empty
  6. show
  7. trim
  8. ucase
  9. lcase
  10. words
  11. indxword
  12. wordlen
  13. word
  14. substr
  15. left
  16. right
  17. command_words
  18. command_indxword
  19. command_wordlen
  20. command_word
  21. find
  22. find_char
  23. command_worddel
  24. worddel
  25. wordput
  26. num
  27. parse_extend
  28. str_replace

/* Word string class implementation
   Rick Smereka, Copyright (C) 2000-2002.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, get a copy via the Internet at
   http://gnu.org/copyleft/gpl.html or write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston,
   MA 02111-1307 USA

   You can contact the author via email at rsmereka@future-lab.com.

   Original version, Oct/2000, Rick Smereka

   Added 'using namespace std'. Dec/2002, Rick Smereka */

#include "stdhead.h"
#include "wstring.h"

using namespace std;

word_string::word_string(char *text)
{
   // Create a word_string using an existing cstring.

   int length;

   // check for null string
   if (text == (char *)NULL)
      ws_contents = "";
   else
      {
      length = strlen(text);

      // check for empty string
      if (!length)
         ws_contents = "";
      else
         ws_contents = text;
      }
}

word_string::word_string(word_string& o_string)
{
   // Copy constructor.

   // null or empty string check
   if (o_string.empty())
      ws_contents = "";
   else
      ws_contents = o_string.ws_contents;
}

word_string::word_string(const word_string& o_string)
{
   // Copy constructor.

   // null or empty string check
   if (o_string.empty())
      ws_contents = "";
   else
      ws_contents = o_string.ws_contents;
}

word_string::word_string(char ch, int cnt)
{
   // Create and fill with 'cnt' number of the character 'ch.'

   int i;

   // error trap
   if (cnt <= 0)
      ws_contents = "";
   else
      {
      ws_contents = "";

      for(i = 0; i < cnt; i++)
         ws_contents += ch;
      }
}

word_string operator + (word_string str1, word_string str2)
{
   // Concatenate. Friend method.

   word_string result;
   int s1_len, s2_len, nlen;

   s1_len = str1.length();
   s2_len = str2.length();
   nlen = s1_len + s2_len ;                  // get total length

   // check for empty string
   if (!nlen)
      return(result);

   // only concatenate non-empty parts
   if (s1_len && s2_len)
      result.ws_contents = str1.ws_contents + str2.ws_contents;
   else
      if (!s1_len)
         result.ws_contents = str2.ws_contents;
      else
         result.ws_contents = str1.ws_contents;

   return(result);
}

word_string operator + (word_string str1, string str2)
{
   // Concatenate. Friend method.

   word_string result;
   int s1_len, s2_len, nlen;

   s1_len = str1.length();
   s2_len = str2.length();
   nlen = s1_len + s2_len ;                  // get total length

   // check for empty string
   if (!nlen)
      return(result);

   // only concatenate non-empty parts
   if (s1_len && s2_len)
      result.ws_contents = str1.ws_contents + str2;
   else
      if (!s1_len)
         result.ws_contents = str2;
      else
         result.ws_contents = str1.ws_contents;

   return(result);
}

word_string operator + (string str1, word_string str2)
{
   // Concatenate. Friend method.

   word_string result;
   int s1_len, s2_len, nlen;

   s1_len = str1.length();
   s2_len = str2.length();
   nlen = s1_len + s2_len ;                  // get total length

   // check for empty string
   if (!nlen)
      return(result);

   // only concatenate non-empty parts
   if (s1_len && s2_len)
      result.ws_contents = str1 + str2.ws_contents;
   else
      if (!s1_len)
         result.ws_contents = str2.ws_contents;
      else
         result.ws_contents = str1;

   return(result);
}

word_string operator + (word_string str1, char *str2)
{
   // Concatenate. Friend method.

   word_string result;
   int s1_len, s2_len, nlen;

   s1_len = str1.length();
   s2_len = strlen(str2);
   nlen = s1_len + s2_len;

   // check for empty string
   if (!nlen)
      return(result);

   // only concatenate non-empty parts
   if (s1_len && s2_len)
      result.ws_contents = str1.ws_contents + str2;
   else
      if (!s1_len)
         result.ws_contents = str2;
      else
         result.ws_contents = str1.ws_contents;

   return(result);
}

word_string operator + (char *str1, word_string str2)
{
   // Concatenate. Friend method.

   word_string result;
   int s1_len, s2_len, nlen;

   s1_len = strlen(str1);
   s2_len = str2.length();
   nlen = s1_len + s2_len;

   // check for empty string
   if (!nlen)
      return(result);

   // only concatenate non-empty parts
   if (s1_len && s2_len)
      result.ws_contents = str1 + str2.ws_contents;
   else
      if (!s1_len)
         result.ws_contents = str2.ws_contents;
      else
         result.ws_contents = str1;

   return(result);
}

word_string operator + (word_string str1, char ch)
{
   // Concatenate. Friend method.

   word_string result;
   int s1_len;

   s1_len = str1.length();

   if (s1_len)
      result.ws_contents = str1.ws_contents + ch;
   else
      result.ws_contents = ch;

   return(result);
}

word_string operator + (char ch, word_string str1)
{
   // Concatenate. Friend method.

   word_string result;
   int s1_len;

   s1_len = str1.length();

   if (s1_len)
      result.ws_contents = ch + str1.ws_contents;
   else
      result.ws_contents = ch;

   return(result);
}

bool word_string::operator == (char ch)
{
   /* Equality test. Only the first character
      of '*this->ws_contents' is compared. */

   if (ws_contents[0] == ch)
      return(TRUE);

   return(FALSE);
}

bool word_string::operator != (char ch)
{
   /* In-equality test. Only the first character
      of '*this->ws_contents' is compared. */

   if (ws_contents[0] != ch)
      return(TRUE);

   return(FALSE);
}

char word_string::operator [] (int pos)
{
   // Retrieve a single character.

   if (pos < 0 || pos >= ws_contents.length())
      return(EOS);

   return(ws_contents[pos]);
}

bool word_string::empty(void)
{
   // Return TRUE if 'ws_contents' is an empty string.

   if (ws_contents == "")
      return(TRUE);

   return(FALSE);
}

void word_string::show(void)
{
   // Print string.

   cout << '\'' << ws_contents << '\'' << ",len=" << ws_contents.length() << "\n";
}

word_string word_string::trim(void)
{
   /* Trim a string by removing all spaces from the beginning
      and the end plus remove all extra spaces between words.
      The result word string is returned. */

   word_string result;
   int i, len, pos, done;

   len = length();

   if (!len)
      return(result);

   // get position of first non-blank character

   for(i = 0, pos = -1, done = FALSE; i < len && !done; i++)
      if (ws_contents[i] != ' ')
         {
         pos = i;
         done = TRUE;
         }

   // check for all blank string

   if (pos == -1)
      return(result);

   i = pos;

   while(i < len)
      {
      while(ws_contents[i] != ' '  && i < len)
         {
         result += ws_contents[i];
         i++;
         }

      if (i < len)
         while(ws_contents[i] == ' ' && i < len)
            i++;

      if (i < len)
         result += ' ';
      }

   return(result);
}

word_string word_string::ucase(void)
{
   /* Copy a string to its uppercase version. Result string
      is returned. */

   word_string result;
   int i, len;

   result = "";
   len = length();

   if (!len)
      return(result);

   for(i = 0; i < len; i++)
      result += toupper(ws_contents[i]);

   return(result);
}

word_string word_string::lcase(void)
{
   /* Copy a string to its lowercase version. Result string
      is returned. */

   word_string result;
   int i, len;

   result = "";
   len = length();

   if (!len)
      return(result);

   for(i = 0; i < len; i++)
      result += tolower(ws_contents[i]);

   return(result);
}

int word_string::words(char delim)
{
   // Count the number of 'delim' delimited words.

   word_string tmp;
   int i, len, nwords;

   len = length();

   if (!len)
      return(0);

   // trim string if delimiter is a space

   if (delim == ' ')
      {
      tmp = trim();
      len = tmp.length();

      if (!len)
         return(0);
      }
   else
      tmp.ws_contents = ws_contents;

   // count instances

   for(i = 0, nwords = 0; i < len; i++)
      if (tmp[i] == delim)
         nwords++;

   return(nwords + 1);
}

int word_string::indxword(int which, char delim)
{
   /* Return the index of word 'which' delimited by 'delim'.
      Function returns the index on success, -1 otherwise. */

   word_string tmp;
   int i, len, nwords, index;

   len = length();

   if (!len)
      return(-1);

   if (which <= 0 || which > words(delim))
      return(-1);

   // trim string if delimiter is a space

   if (delim == ' ')
      {
      tmp = trim();
      len = tmp.length();

      if (!len)
         return(-1);
      }
   else
      tmp.ws_contents = ws_contents;

   i = 0;
   nwords = 1;

   while(i <= len)
      {
      index = i;

      if (nwords == which)
         return(index);

      while(tmp[i] != delim && i < len)
         i++;

      nwords++;
      i++;
      }

   return(-1);
}

int word_string::wordlen(int which, char delim)
{
   /* Get the length of 'delim' delimited word 'which'.
      Functions returns length upon sucess, -1 otherwise.
      It is acceptable to have a word of zero length. */

   int nwords;
   int len;
   int wrdlen;

   if ((len = length()) == 0)
      return(-1);

   nwords = words(delim);

   if (!nwords)
      return(-1);

   if (which <= 0 || which > nwords)
      return(-1);

   if (which == nwords)
      wrdlen = len - indxword(which, delim);
   else
      wrdlen = indxword(which + 1, delim) - (indxword(which, delim) + 1);

   return(wrdlen);
}

bool word_string::word(word_string& outstr, int which, char delim)
{
   /* Extract the 'which' word and return it in 'outstr'.
      Each word is delimited by 'delim' Function returns TRUE on
      success, FALSE otherwise. */

   int len;
   int pos;

   len = length();
   outstr = "";

   if (!len)
      return(FALSE);

   if (which <= 0 || which > words(delim))
      return(FALSE);

   pos = indxword(which, delim);
   len = wordlen(which, delim);
   outstr = substr(pos, len);
   return(TRUE);
}

word_string word_string::substr(int pos, int len)
{
   // Sub-string extract.

   word_string result;

   if (pos < 0 || len <= 0)
      return(result);

   result.ws_contents = ws_contents.substr(pos, len);
   return(result);
}

word_string word_string::left(int len)
{
   // Extract from the left side of the string.

   word_string result;

   if (len <= 0)
      return(result);

   if (len >= length())
      result.ws_contents = ws_contents;
   else
      result.ws_contents = ws_contents.substr(0, len);

   return(result);
}

word_string word_string::right(int len)
{
   // Extract from the right side of the string.

   word_string result;
   int pos, tlen;

   tlen = length();

   if (len <= 0 || !tlen)
      return(result);

   pos = tlen - len;

   if (pos < 0)
      result.ws_contents = ws_contents;
   else
      result.ws_contents = ws_contents.substr(pos, len);

   return(result);
}

int word_string::command_words(void)
{
   /* Count the number of 'command' words in a string. A
      command word can be either a space delimited word or
      multiple words delimited by matching quotes. Function
      returns the number of command words found. */

   char ch;
   char prev_ch;
   char delim;
   int len;
   int nwords;
   int pos;
   int found;

   len = length();

   if (!len)
      return(0);

   nwords = 0;
   pos = 0;

   // loop through all characters in string looking for delimiters

   do
      {
      ch = ws_contents[pos];

      switch(ch)
         {
         case ' ':
            // a space is an unconditional word

            nwords++;
            pos++;
            break;

         case '\'':
         case '\"':
            if (pos > 0)
               {
               prev_ch = ws_contents[pos - 1];

               if (prev_ch != ' ' && prev_ch != '\"' && prev_ch != '\'')
                  nwords++;
               }

            delim = ch;
            found = FALSE;
            pos++;

            // find matching quote

            do
               {
               ch = ws_contents[pos];

               if (delim == ch)
                  {
                  if (ws_contents[pos + 1] == ' ')
                     pos += 2;
                  else
                     pos++;

                  /* add another word as long as the quote is
                     not the last character */

                  if (pos < len)
                     nwords++;

                  found = TRUE;
                  }
               else
                  pos++;
               }
            while(!found && pos <= len);

            break;

         default:
            pos++;
         };
      }
   while(pos <= len);

   return(nwords + 1);
}

int word_string::command_indxword(int which)
{
   /* Return the index of word 'which' command
      delimited. Function returns the index
      on success, -1 otherwise. */

   int pos, len, nword, done;
   int found;
   char ch, delim, prev_ch;

   len = length();

   if (!len)
      return(-1);

   if (which <= 0 || which > command_words())
      return(-1);

   pos = 0;
   nword = 1;
   done = FALSE;

   // loop through all characters in string looking for delimiters

   do
      {
      // handle first word requested

      if (nword == which)
         {
         if (ws_contents[pos] == '\'' || ws_contents[pos] == '\"')
            pos++;

         done = TRUE;
         continue;
         }

      ch = ws_contents[pos];

      switch(ch)
         {
         case ' ':
            // a space is an unconditional word

            nword++;
            pos++;

            if (nword == which)
               done = TRUE;

            // move position if the next character is a quote

            if (done && pos < len)
               if (ws_contents[pos] == '\'' || ws_contents[pos] == '\"')
                  pos++;

            break;

         case '\'':
         case '\"':
            // if quote is to the right of a non-delimiter, add one word

            if (pos > 0)
               {
               prev_ch = ws_contents[pos - 1];

               if (prev_ch != ' ' && prev_ch != '\"' && prev_ch != '\'')
                  {
                  nword++;

                  if (nword == which)
                     {
                     done = TRUE;
                     break;
                     }
                  }
               }

            delim = ch;
            found = FALSE;
            pos++;

            // find matching quote

            do
               {
               ch = ws_contents[pos];

               if (delim == ch)
                  {
                  if (ws_contents[pos + 1] == ' ')
                     pos += 2;
                  else
                     pos++;

                  if (pos < len)
                     {
                     nword++;

                     if (nword == which)
                        {
                        done = TRUE;

                        if (ws_contents[pos] == '\'' || ws_contents[pos] == '"')
                           pos++;
                        }
                     }

                  found = TRUE;
                  }
               else
                  pos++;
               }
            while(!found && pos <= len);

            break;

         default:
            pos++;
            break;
         }
      }
   while(pos <= len && !done);

   if (done)
      return(pos);

   return(-1);
}

int word_string::command_wordlen(int which)
{
   /* Get and return the length of the 'which' command word.
      Function returns the word length upon success (which
      may be zero), -1 otherwise. */

   int indx, i, len, outlen;
   char delim;

   len = length();

   if (!len)
      return(-1);
      
   if (which <= 0 || which > command_words())
      return(-1);

   indx = command_indxword(which);

   // set delimiter

   if (indx == 0)
      delim = ' ';
   else
      delim = ws_contents[indx - 1];

   // loop looking for the next delimiter or end of the string

   for(i = indx, outlen = 0; i < len; i++, outlen++)
      if (ws_contents[i] == delim)
         break;

   return(outlen);
}

bool word_string::command_word(word_string& o_string, int which)
{
   /* Extract the 'which' command word. Output command word
      will be placed into the string 'o_string'. Function 
      returns 'TRUE' upon success, 'FALSE' otherwise. */

   int len, pos;

   o_string = "";

   if (which <= 0 || which > command_words())
      return(FALSE);

   pos = command_indxword(which);
   len = command_wordlen(which);
   o_string.ws_contents = ws_contents.substr(pos, len);
   return(TRUE);
}

int word_string::find(word_string& sub)
{
   /* Find the occurrance of 'sub' within '*this.' Returns
      a numeric index if found, -1 otherwise. */

   int i, j, k;

   for(i = 0; ws_contents[i] != EOS; i++)
      {
      for(j = i, k = 0; sub[k] == ws_contents[j] && sub[k] != EOS; j++, k++)
         ;

      if (sub[k] == EOS)
         return(i);
      }

   return(-1);
}

int word_string::find_char(char findch)
{
   /* Find the first instance of the character 'findch'. Function returns
      the index if found, '-1' otherwise. */
      
   int len, i;
   
   len = length();
   
   if (!len)
      return(-1);
      
   for(i = 0; i < len; i++)
      if (ws_contents[i] == findch)
         return(i);
         
   return(-1);
}

bool word_string::command_worddel(int which)
{
   /* Delete the 'which' command word. Upon success, '*this->ws_contents'
      be loaded with the new string. Function returns 'TRUE' upon
      success, 'FALSE' otherwise. */

   word_string tmpout, tmpwrd;
   char quote;
   int nwords, nwords_out;
   int len;
   int pos;
   int i;

   len = length();

   if (!len || which <= 0)
      return(FALSE);

   nwords = command_words();
   nwords_out = 0;

   if (which > nwords)
      return(FALSE);

   // loop to copy all words except the one to delete

   for(i = 1; i <= nwords; i++)
      {
      if (i != which)
         {
         if (!command_word(tmpwrd, i))
            return(FALSE);

         /* if word contains multiple space delimited words,
            put the quotes back. */

         if (tmpwrd.words() > 1)
            {
            /* decide which quotes to use based on if there
               are any existing quotes */

            if (tmpwrd.find_char('\'') != -1)
               quote = '\"';
            else
               quote = '\'';

            tmpout += quote;
            tmpout += tmpwrd;
            tmpout += quote;
            }
         else
            tmpout += tmpwrd;

         nwords_out++;

         /* concatenate the output string with a delimiter
            unless we are on the last word to be output */

         if (nwords_out < (nwords - 1))
            tmpout += ' ';

         /* special logic to break if we are done before
            the natual end of the loop, this should occurr
            only if the last word is being deleted. */

         if (nwords_out == (nwords - 1))
            break;
         }
      }

   ws_contents = tmpout.ws_contents;
   return(TRUE);
}

bool word_string::worddel(int which, char delim)
{
   /* Delete the 'which' word delimited by 'delim'.
      Upon success, '*this->ws_contents' will be loaded with the new string.
      Function returns 'TRUE' upon success, 'FALSE' otherwise. */

   word_string tmpout, tmpwrd;
   int nwords, nwords_out;
   int len, i;

   len = length();

   if (!len || which <= 0)
      return(FALSE);

   nwords = words(delim);
   nwords_out = 0;

   if (which > nwords)
      return(FALSE);

   // loop to copy all words except the one to delete

   for(i = 1; i <= nwords; i++)
      {
      if (i != which)
         {
         if (!word(tmpwrd, i, delim))
            return(FALSE);

         tmpout += tmpwrd;
         nwords_out++;

         /* concatenate the output string with a delimiter
            unless we are on the last word to be output */

         if (nwords_out < (nwords - 1))
            tmpout += delim;

         /* special logic to break if we are done before
            the natual end of the loop, this should occurr
            only if the last word is being deleted. */

         if (nwords_out == (nwords - 1))
            break;
         }
      }

   ws_contents = tmpout.ws_contents;
   return(TRUE);
}

bool word_string::wordput(word_string& wdata, int which, char delim)
{
   /* Place the word 'wdata' into '*this->ws_contents' as the 'which' word. If the
      word already exists, it will be replaced, if does not exist, the
      string will be extended out to the require word before copying.
      Each word is delimited by 'delim'. Function returns 'TRUE' upon
      success, 'FALSE' otherwise. */

   word_string new_str, before, after, tmp;
   int existing_rec_len;
   int new_rec_len;
   int new_field_len;
   int existing_field_len;
   int nwords;
   int len;
   int indx;
   int process;

   len = length();
   
   if (which <= 0)
      return(FALSE);

   nwords = words(delim);

   /* create a copy of input (if present) or a new string
      large enough for the word plus the number of word
      delimiters plus a little extra */

   if (len)
      new_str = ws_contents;

   // extend string out if required

   if (which > nwords)
      {
      if (!new_str.parse_extend(which, delim))
         return(FALSE);

      len = new_str.length();
      nwords = which;
      }

   if (!len)
      existing_field_len = existing_rec_len = 0;
   else
      {
      existing_field_len = new_str.wordlen(which, delim);
      existing_rec_len = len;
      }

   new_field_len = wdata.length();

   // if existing and new strings are length zero, nothing to do

   if (new_field_len == 0 && existing_field_len == 0)
      return(TRUE);

   // if strings are equal in length, place new data over top of old

   if (existing_field_len == new_field_len)
      {
      indx = new_str.indxword(which, delim);
      
      if (!new_str.str_replace(indx, wdata))
         return(FALSE);

      ws_contents = new_str.ws_contents;
      return(TRUE);
      }

   process = TRUE;

   // figure out and copy before and after data

   // handle first word in string

   if (which == 1)
      {
      before = "";

      if (nwords == which)
         after = "";
      else
         {
         // load 'after' from start of second word to end

         indx = new_str.indxword(2, delim);
         after = new_str.substr(indx, len - indx + 1);
         }

      process = FALSE;
      }

   // handle last word in string

   if (process && which == nwords)
      {
      // load 'before' from start to delimiter just before 'which'

      indx = new_str.indxword(which, delim);
      before = new_str.substr(0, indx);
      after = "";
      process = FALSE;
      }

   // handle 'which' in the middle of the string

   if (process)
      {
      // load 'before' from start to delimiter just before 'which'

      indx = new_str.indxword(which, delim);
      before = new_str.substr(0, indx);

      /* load 'after' from start of word after 'which' to the end
         (no delimiters) */

      indx = new_str.indxword(which + 1, delim);
      after = new_str.substr(indx, len - indx + 1);
      }

   // build final string from three parts

   tmp = "";

   // part one, 'before' string

   if (before.length())
      tmp = before;

   // part two, new word data

   tmp += wdata;

   // part three, 'after' string

   if (after.length())
      {
      tmp += delim;
      tmp += after;
      }

   ws_contents = tmp.ws_contents;
   return(TRUE);
}

bool word_string::num(void)
{
   /* Evaluate a string to determine whether it is a valid number.
      Numbers are the digits 0-9. A minus sign is allowed in the
      first character. Function returns 'TRUE' if the string is
      a valid number, 'FALSE' otherwise. */

   int len, i;

   len = length();

   if (!len)
      return(FALSE);

   for(i = 0; i < len; i++)
      {
      if (ws_contents[i] == '-')
         {
         if (i > 0)
            return(FALSE);
         }
      else
         if (!isdigit(ws_contents[i]))
            return(FALSE);
      }

   return(TRUE);
}

bool word_string::parse_extend(int which, char delim)
{
   /* Extend string data out to 'which' 'delim' delimited word.
      You may not extend using the space delimiter because of the
      nature of the space and the 'trim' function. Function returns
      'TRUE' upon success, 'FALSE' otherwise. Upon success, '*this->ws_contents'
      will be loaded with the extended string. */

   word_string new_rec;
   int nwords;
   int i;

   nwords = words(delim);

   /* if the existing string is empty and we are adding the
      first word, do not extend */

   if (!nwords && which == 1)
      return(TRUE);

   if (which <= 0 || delim == ' ')
      return(FALSE);

   // if word is already present, extend not necessary

   if (nwords >= which)
      return(TRUE);

   new_rec = ws_contents;

   // extend string

   for(i = nwords; i < which; i++)
      new_rec += delim;

   ws_contents = new_rec.ws_contents;
   return(TRUE);
}

bool word_string::str_replace(int spos, word_string& wdata)
{
   /* Replace bytes in a string. Bytes from index 'spos' will
      be replaced with content from 'wdata'. Number of bytes
      replaced depends on length of 'wdata'. Start index plus
      length of 'wdata' cannot exceed length of string.
      String '*this->ws_contents' is directly modified.
      Function returns 'TRUE' upon success, 'FALSE'
      otherwise. */
      
   int len, wdata_len, i, cnt;
   
   len = length();
   wdata_len = wdata.length();
   
   if (!len || !wdata_len || spos < 0)  
      return(FALSE);
            
   if (spos + wdata_len > len)
      return(FALSE);
      
   for(i = spos, cnt = 0; cnt < wdata_len; i++, cnt++)
      ws_contents[i] = wdata[cnt];
      
   return(TRUE);
}

/* [<][>][^][v][top][bottom][index][help] */