Unicode support for strings

Add suport for using unicode strings in .scad files. Support iterating across them/accessing them via [] and searching. -------- Add GLIB (to build for test and normal build -- both with installed and built locally development files). Add support for unicode chars to length and search builtin functions and [] for strings. Added unicode testing functions. Ad GLIB to library info page.
author: Brody Kenrick <user.fake@server.userfake> 2013-12-05 06:56:54 (GMT)
committer: Brody Kenrick <user.fake@server.userfake> 2013-12-05 07:28:40 (GMT)
commit: 0717c67c9fa894ecb08dc5de281753a00922d1ee (patch)
tree: 77baf10b4244a189f1212f3affee08a82a999013 /src
parent: d3b82dcac0cbd6bb46c3236d1183f84b76b44748 (diff)
4 files changed, 60 insertions, 11 deletions
diff --git a/src/AboutDialog.html b/src/AboutDialog.html
index 99e7c3b..65a54d7 100644
--- a/src/AboutDialog.html
+++ b/src/AboutDialog.html
@@ -64,6 +64,7 @@ Please visit this link for a copy of the license: <a href="http://www.gnu.org/li
 <li><a href="http://www.stroustrup.com/C++.html">C++</a>, <a href="http://gcc.gnu.org/">GCC</a>, <a href="http://clang.llvm.org/">clang</a>
 <li><a href="http://www.python.org">python</a>
 <li><a href="http://nsis.sourceforge.net/Main_Page">Nullsoft installer</a>
+<li><a href="https://developer.gnome.org/glib/">GLib</a>
 </lu>
 </p>
 
diff --git a/src/PlatformUtils.cc b/src/PlatformUtils.cc
index b02b822..8b39f6d 100644
--- a/src/PlatformUtils.cc
+++ b/src/PlatformUtils.cc
@@ -1,6 +1,8 @@
 #include "PlatformUtils.h"
 #include "boosty.h"
 
+#include <glib.h>
+
 bool PlatformUtils::createLibraryPath()
 {
 	std::string path = PlatformUtils::libraryPath();
@@ -114,6 +116,7 @@ std::string PlatformUtils::info()
 	  << "\nOpenCSG version: " << OPENCSG_VERSION_STRING
 	  << "\nQt version: " << qtVersion
 	  << "\nMingW build: " << mingwstatus
+	  << "\nGLib version: "       << GLIB_MAJOR_VERSION << "." << GLIB_MINOR_VERSION << "." << GLIB_MICRO_VERSION
 	  << "\nOPENSCADPATH: " << getenv("OPENSCADPATH") << "\n"
 	;
 	return s.str();
diff --git a/src/func.cc b/src/func.cc
index 865a2b4..4587f72 100644
--- a/src/func.cc
+++ b/src/func.cc
@@ -45,6 +45,8 @@
 
 #include <boost/random/mersenne_twister.hpp>
 #include <boost/random/uniform_real.hpp>
+/*Unicode support for string lengths and array accesses*/
+#include <glib.h>
 
 #ifdef __WIN32__
 #include <process.h>
@@ -306,7 +308,11 @@ Value builtin_length(const Context *, const EvalContext *evalctx)
 {
 	if (evalctx->numArgs() == 1) {
 		if (evalctx->getArgValue(0).type() == Value::VECTOR) return Value(int(evalctx->getArgValue(0).toVector().size()));
-		if (evalctx->getArgValue(0).type() == Value::STRING) return Value(int(evalctx->getArgValue(0).toString().size()));
+		if (evalctx->getArgValue(0).type() == Value::STRING) {
+			//Unicode glyph count for the length -- rather than the string (num. of bytes) length.
+			std::string text = evalctx->getArgValue(0).toString();
+			return Value(int( g_utf8_strlen( text.c_str(), text.size() ) ));
+		}
 	}
 	return Value();
 }
@@ -380,10 +386,17 @@ Value builtin_lookup(const Context *, const EvalContext *evalctx)
   num_returns_per_match : int;
   index_col_num : int;
 
+ The search string and searched strings can be unicode strings.
  Examples:
   Index values return as list:
     search("a","abcdabcd");
-        - returns [0,4]
+        - returns [0]
+    search("Л","Л");  //A unicode string
+        - returns [0]
+    search("🂡aЛ","a🂡Л🂡a🂡Л🂡a",0);
+        - returns [[1,3,5,7],[0,4,8],[2,6]]
+    search("a","abcdabcd",0); //Search up to all matches
+        - returns [[0,4]]
     search("a","abcdabcd",1);
         - returns [0]
     search("e","abcdabcd",1);
@@ -433,16 +446,25 @@ Value builtin_search(const Context *, const EvalContext *evalctx)
 		}
 	} else if (findThis.type() == Value::STRING) {
 		unsigned int searchTableSize;
-		if (searchTable.type() == Value::STRING) searchTableSize = searchTable.toString().size();
-		else searchTableSize = searchTable.toVector().size();
-		for (size_t i = 0; i < findThis.toString().size(); i++) {
+		//Unicode glyph count for the length
+		unsigned int findThisSize =  g_utf8_strlen( findThis.toString().c_str(), findThis.toString().size() );
+		if (searchTable.type() == Value::STRING) {
+			searchTableSize = g_utf8_strlen( searchTable.toString().c_str(), searchTable.toString().size() );
+		} else {
+		    searchTableSize = searchTable.toVector().size();
+		}
+		for (size_t i = 0; i < findThisSize; i++) {
 		  unsigned int matchCount = 0;
 			Value::VectorType resultvec;
 		  for (size_t j = 0; j < searchTableSize; j++) {
-		    if ((searchTable.type() == Value::VECTOR && 
-						 findThis.toString()[i] == searchTable.toVector()[j].toVector()[index_col_num].toString()[0]) ||
-						(searchTable.type() == Value::STRING && 
-						 findThis.toString()[i] == searchTable.toString()[j])) {
+		    gchar* ptr_ft = g_utf8_offset_to_pointer(findThis.toString().c_str(), i);
+		    gchar* ptr_st = NULL;
+		    if(searchTable.type() == Value::VECTOR) {
+		        ptr_st = g_utf8_offset_to_pointer(searchTable.toVector()[j].toVector()[index_col_num].toString().c_str(), 0);
+		    } else if(searchTable.type() == Value::STRING){
+		    	ptr_st = g_utf8_offset_to_pointer(searchTable.toString().c_str(), j);
+		    }
+		    if( (ptr_ft) && (ptr_st) && (g_utf8_get_char(ptr_ft) == g_utf8_get_char(ptr_st)) ) {
 		      Value resultValue((double(j)));
 		      matchCount++;
 		      if (num_returns_per_match == 1) {
@@ -454,7 +476,14 @@ Value builtin_search(const Context *, const EvalContext *evalctx)
 		      if (num_returns_per_match > 1 && matchCount >= num_returns_per_match) break;
 		    }
 		  }
-		  if (matchCount == 0) PRINTB("  WARNING: search term not found: \"%s\"", findThis.toString()[i]);
+		  if (matchCount == 0) {
+			  gchar* ptr_ft = g_utf8_offset_to_pointer(findThis.toString().c_str(), i);
+			  gchar utf8_of_cp[6] = ""; //A buffer for a single unicode character to be copied into
+			  if(ptr_ft) {
+			      g_utf8_strncpy( utf8_of_cp, ptr_ft, 1 );
+		      }
+			  PRINTB("  WARNING: search term not found: \"%s\"", utf8_of_cp );
+		  }
 		  if (num_returns_per_match == 0 || num_returns_per_match > 1) {
 				returnvec.push_back(Value(resultvec));
 			}
diff --git a/src/value.cc b/src/value.cc
index 5afb650..c8a88c6 100644
--- a/src/value.cc
+++ b/src/value.cc
@@ -36,6 +36,8 @@
 #include <boost/format.hpp>
 #include "boost-utils.h"
 #include "boosty.h"
+/*Unicode support for string lengths and array accesses*/
+#include <glib.h>
 
 std::ostream &operator<<(std::ostream &stream, const Filename &filename)
 {
@@ -579,14 +581,28 @@ Value Value::operator-() const
   }
 */
 
+/*
+ * bracket operation [] detecting multi-byte unicode.
+ * If the string is multi-byte unicode then the index will offset to the character (2 or 4 byte) and not to the byte.
+ * A 'normal' string with byte chars are a subset of unicode and still work.
+ */
 class bracket_visitor : public boost::static_visitor<Value>
 {
 public:
   Value operator()(const std::string &str, const double &idx) const {
     int i = int(idx);
     Value v;
+    //Check that the index is positive and less than the size in bytes
     if ((i >= 0) && (i < (int)str.size())) {
-      v = Value(str[int(idx)]);
+	  //Ensure character (not byte) index is inside the character/glyph array
+	  if( (unsigned) i < g_utf8_strlen( str.c_str(), str.size() ) )	{
+		  gchar utf8_of_cp[6] = ""; //A buffer for a single unicode character to be copied into
+		  gchar* ptr = g_utf8_offset_to_pointer(str.c_str(), i);
+		  if(ptr) {
+		    g_utf8_strncpy(utf8_of_cp, ptr, 1);
+		  }
+		  v = std::string(utf8_of_cp);
+	  }
       //      std::cout << "bracket_visitor: " <<  v << "\n";
     }
     return v;
author	Brody Kenrick <user.fake@server.userfake>	2013-12-05 06:56:54 (GMT)
committer	Brody Kenrick <user.fake@server.userfake>	2013-12-05 07:28:40 (GMT)
commit	0717c67c9fa894ecb08dc5de281753a00922d1ee (patch)
tree	77baf10b4244a189f1212f3affee08a82a999013 /src
parent	d3b82dcac0cbd6bb46c3236d1183f84b76b44748 (diff)