[bitc-dev] Code for bitc_vector_string

Sam Mason sam at samason.me.uk
Mon Jul 3 13:36:40 EDT 2006


On Mon, Jul 03, 2006 at 06:34:08PM +0100, sam wrote:
> It's not very nice code

may be a good idea if I include the code!


  Sam
-------------- next part --------------
diff -rN -u old-coyotos/src.coyotos/src/ccs/bitcc-bootstrap/libbitc/string.c new-coyotos/src.coyotos/src/ccs/bitcc-bootstrap/libbitc/string.c
--- old-coyotos/src.coyotos/src/ccs/bitcc-bootstrap/libbitc/string.c	2006-07-03 18:25:29.000000000 +0100
+++ new-coyotos/src.coyotos/src/ccs/bitcc-bootstrap/libbitc/string.c	2006-07-03 18:25:30.000000000 +0100
@@ -98,18 +98,72 @@
 }
 DEFCLOSURE(bitc_string_nth);
 
-#if 0
 bitc_string_t *
-DEFUN(bitc_vector_string, arg0_bitc_vector_string vec)
+DEFUN(bitc_vector_string, arg_0_bitc_vector_string vec)
 {
-  size_t len = vec->strlen(s);
+  bitc_word_t   len  = vec->len;
+  bitc_char_t * ucs4 = vec->elem;
+  
+  bitc_word_t utf8len = 0;
+  for (bitc_word_t i = vec->len-1; i >= 0; i--) {
+    if (ucs4[i] <= 0x7f) {
+      utf8len += 1;
+    } else if (ucs4[i] <= 0x7ff) {
+      utf8len += 2;
+    } else if (ucs4[i] <= 0xffff) {
+      utf8len += 3;
+    } else if (ucs4[i] <= 0x1fffff) {
+      utf8len += 4;
+    } else if (ucs4[i] <= 0x3ffffff) {
+      utf8len += 5;
+    } else if (ucs4[i] <= 0x7fffffff) {
+      utf8len += 6;
+    }
+  }
+  char * s = GC_ALLOC(utf8len);
+  char * utf8 = s;
+  for (bitc_word_t i = 0; i < vec->len-1; i++) {
+    if (ucs4[i] <= 0x7f) {
+      *utf8++ = ucs4[i];
+    }
+    else if (ucs4[i] <= 0x7ff) {
+      *utf8++ = 192u + (ucs4[i] / 64);
+      *utf8++ = 128u + (ucs4[i] % 64);
+    }
+    else if (ucs4[i] <= 0xffff) {
+      *utf8++ = 224u + (ucs4[i] / 4096);
+      *utf8++ = 128u + ((ucs4[i] / 64) % 64);
+      *utf8++ = 128u + (ucs4[i] % 64);
+    }
+    else if (ucs4[i] <= 0x1fffff) {
+      *utf8++ = 240u + (ucs4[i] / 262144);
+      *utf8++ = 128u + ((ucs4[i] / 4096) % 64);
+      *utf8++ = 128u + ((ucs4[i] / 64) % 64);
+      *utf8++ = 128u + (ucs4[i] % 64);
+    }
+    else if (ucs4[i] <= 0x3ffffff) {
+      *utf8++ = 248u + (ucs4[i] / 16777216);
+      *utf8++ = 128u + ((ucs4[i] / 262144) % 64);
+      *utf8++ = 128u + ((ucs4[i] / 4096) % 64);
+      *utf8++ = 128u + ((ucs4[i] / 64) % 64);
+      *utf8++ = 128u + (ucs4[i] % 64);
+    }
+    else if (ucs4[i] <= 0x7fffffff) {
+      *utf8++ = 252u + (ucs4[i] / 1073741824);
+      *utf8++ = 128u + ((ucs4[i] / 16777216) % 64);
+      *utf8++ = 128u + ((ucs4[i] / 262144) % 64);
+      *utf8++ = 128u + ((ucs4[i] / 4096) % 64);
+      *utf8++ = 128u + ((ucs4[i] / 64) % 64);
+      *utf8++ = 128u + (ucs4[i] % 64);
+    }
+  }
   bitc_string_t *tmp = (bitc_string_t *) 
     GC_ALLOC_ATOMIC(sizeof(bitc_string_t));
   tmp->length = len;
   tmp->s = s;
   return tmp;
 }
-#endif
+DEFCLOSURE(bitc_vector_string);
 
 #if 0
 bitc_unit_t



More information about the bitc-dev mailing list