diff options
author | Brody Kenrick <user.fake@server.userfake> | 2013-12-05 06:56:54 (GMT) |
---|---|---|
committer | Brody Kenrick <user.fake@server.userfake> | 2013-12-05 07:28:40 (GMT) |
commit | 0717c67c9fa894ecb08dc5de281753a00922d1ee (patch) | |
tree | 77baf10b4244a189f1212f3affee08a82a999013 /src | |
parent | d3b82dcac0cbd6bb46c3236d1183f84b76b44748 (diff) |
Unicode support for strings
Add suport for using unicode strings in .scad files. Support iterating
across them/accessing them via [] and searching.
--------
Add GLIB (to build for test and normal build -- both with installed and
built locally development files).
Add support for unicode chars to length and search builtin functions and
[] for strings.
Added unicode testing functions.
Ad GLIB to library info page.
Diffstat (limited to 'src')
-rw-r--r-- | src/AboutDialog.html | 1 | ||||
-rw-r--r-- | src/PlatformUtils.cc | 3 | ||||
-rw-r--r-- | src/func.cc | 49 | ||||
-rw-r--r-- | src/value.cc | 18 |
4 files changed, 60 insertions, 11 deletions
diff --git a/src/AboutDialog.html b/src/AboutDialog.html index 99e7c3b..65a54d7 100644 --- a/src/AboutDialog.html +++ b/src/AboutDialog.html @@ -64,6 +64,7 @@ Please visit this link for a copy of the license: <a href="http://www.gnu.org/li <li><a href="http://www.stroustrup.com/C++.html">C++</a>, <a href="http://gcc.gnu.org/">GCC</a>, <a href="http://clang.llvm.org/">clang</a> <li><a href="http://www.python.org">python</a> <li><a href="http://nsis.sourceforge.net/Main_Page">Nullsoft installer</a> +<li><a href="https://developer.gnome.org/glib/">GLib</a> </lu> </p> diff --git a/src/PlatformUtils.cc b/src/PlatformUtils.cc index b02b822..8b39f6d 100644 --- a/src/PlatformUtils.cc +++ b/src/PlatformUtils.cc @@ -1,6 +1,8 @@ #include "PlatformUtils.h" #include "boosty.h" +#include <glib.h> + bool PlatformUtils::createLibraryPath() { std::string path = PlatformUtils::libraryPath(); @@ -114,6 +116,7 @@ std::string PlatformUtils::info() << "\nOpenCSG version: " << OPENCSG_VERSION_STRING << "\nQt version: " << qtVersion << "\nMingW build: " << mingwstatus + << "\nGLib version: " << GLIB_MAJOR_VERSION << "." << GLIB_MINOR_VERSION << "." << GLIB_MICRO_VERSION << "\nOPENSCADPATH: " << getenv("OPENSCADPATH") << "\n" ; return s.str(); diff --git a/src/func.cc b/src/func.cc index 865a2b4..4587f72 100644 --- a/src/func.cc +++ b/src/func.cc @@ -45,6 +45,8 @@ #include <boost/random/mersenne_twister.hpp> #include <boost/random/uniform_real.hpp> +/*Unicode support for string lengths and array accesses*/ +#include <glib.h> #ifdef __WIN32__ #include <process.h> @@ -306,7 +308,11 @@ Value builtin_length(const Context *, const EvalContext *evalctx) { if (evalctx->numArgs() == 1) { if (evalctx->getArgValue(0).type() == Value::VECTOR) return Value(int(evalctx->getArgValue(0).toVector().size())); - if (evalctx->getArgValue(0).type() == Value::STRING) return Value(int(evalctx->getArgValue(0).toString().size())); + if (evalctx->getArgValue(0).type() == Value::STRING) { + //Unicode glyph count for the length -- rather than the string (num. of bytes) length. + std::string text = evalctx->getArgValue(0).toString(); + return Value(int( g_utf8_strlen( text.c_str(), text.size() ) )); + } } return Value(); } @@ -380,10 +386,17 @@ Value builtin_lookup(const Context *, const EvalContext *evalctx) num_returns_per_match : int; index_col_num : int; + The search string and searched strings can be unicode strings. Examples: Index values return as list: search("a","abcdabcd"); - - returns [0,4] + - returns [0] + search("Л","Л"); //A unicode string + - returns [0] + search("🂡aЛ","a🂡Л🂡a🂡Л🂡a",0); + - returns [[1,3,5,7],[0,4,8],[2,6]] + search("a","abcdabcd",0); //Search up to all matches + - returns [[0,4]] search("a","abcdabcd",1); - returns [0] search("e","abcdabcd",1); @@ -433,16 +446,25 @@ Value builtin_search(const Context *, const EvalContext *evalctx) } } else if (findThis.type() == Value::STRING) { unsigned int searchTableSize; - if (searchTable.type() == Value::STRING) searchTableSize = searchTable.toString().size(); - else searchTableSize = searchTable.toVector().size(); - for (size_t i = 0; i < findThis.toString().size(); i++) { + //Unicode glyph count for the length + unsigned int findThisSize = g_utf8_strlen( findThis.toString().c_str(), findThis.toString().size() ); + if (searchTable.type() == Value::STRING) { + searchTableSize = g_utf8_strlen( searchTable.toString().c_str(), searchTable.toString().size() ); + } else { + searchTableSize = searchTable.toVector().size(); + } + for (size_t i = 0; i < findThisSize; i++) { unsigned int matchCount = 0; Value::VectorType resultvec; for (size_t j = 0; j < searchTableSize; j++) { - if ((searchTable.type() == Value::VECTOR && - findThis.toString()[i] == searchTable.toVector()[j].toVector()[index_col_num].toString()[0]) || - (searchTable.type() == Value::STRING && - findThis.toString()[i] == searchTable.toString()[j])) { + gchar* ptr_ft = g_utf8_offset_to_pointer(findThis.toString().c_str(), i); + gchar* ptr_st = NULL; + if(searchTable.type() == Value::VECTOR) { + ptr_st = g_utf8_offset_to_pointer(searchTable.toVector()[j].toVector()[index_col_num].toString().c_str(), 0); + } else if(searchTable.type() == Value::STRING){ + ptr_st = g_utf8_offset_to_pointer(searchTable.toString().c_str(), j); + } + if( (ptr_ft) && (ptr_st) && (g_utf8_get_char(ptr_ft) == g_utf8_get_char(ptr_st)) ) { Value resultValue((double(j))); matchCount++; if (num_returns_per_match == 1) { @@ -454,7 +476,14 @@ Value builtin_search(const Context *, const EvalContext *evalctx) if (num_returns_per_match > 1 && matchCount >= num_returns_per_match) break; } } - if (matchCount == 0) PRINTB(" WARNING: search term not found: \"%s\"", findThis.toString()[i]); + if (matchCount == 0) { + gchar* ptr_ft = g_utf8_offset_to_pointer(findThis.toString().c_str(), i); + gchar utf8_of_cp[6] = ""; //A buffer for a single unicode character to be copied into + if(ptr_ft) { + g_utf8_strncpy( utf8_of_cp, ptr_ft, 1 ); + } + PRINTB(" WARNING: search term not found: \"%s\"", utf8_of_cp ); + } if (num_returns_per_match == 0 || num_returns_per_match > 1) { returnvec.push_back(Value(resultvec)); } diff --git a/src/value.cc b/src/value.cc index 5afb650..c8a88c6 100644 --- a/src/value.cc +++ b/src/value.cc @@ -36,6 +36,8 @@ #include <boost/format.hpp> #include "boost-utils.h" #include "boosty.h" +/*Unicode support for string lengths and array accesses*/ +#include <glib.h> std::ostream &operator<<(std::ostream &stream, const Filename &filename) { @@ -579,14 +581,28 @@ Value Value::operator-() const } */ +/* + * bracket operation [] detecting multi-byte unicode. + * If the string is multi-byte unicode then the index will offset to the character (2 or 4 byte) and not to the byte. + * A 'normal' string with byte chars are a subset of unicode and still work. + */ class bracket_visitor : public boost::static_visitor<Value> { public: Value operator()(const std::string &str, const double &idx) const { int i = int(idx); Value v; + //Check that the index is positive and less than the size in bytes if ((i >= 0) && (i < (int)str.size())) { - v = Value(str[int(idx)]); + //Ensure character (not byte) index is inside the character/glyph array + if( (unsigned) i < g_utf8_strlen( str.c_str(), str.size() ) ) { + gchar utf8_of_cp[6] = ""; //A buffer for a single unicode character to be copied into + gchar* ptr = g_utf8_offset_to_pointer(str.c_str(), i); + if(ptr) { + g_utf8_strncpy(utf8_of_cp, ptr, 1); + } + v = std::string(utf8_of_cp); + } // std::cout << "bracket_visitor: " << v << "\n"; } return v; |