Add support for 4 byte UTF-8 characters and stricter character checking
This commit is contained in:
		
							parent
							
								
									05edfa48b2
								
							
						
					
					
						commit
						8a7e892aeb
					
				| @ -63,7 +63,7 @@ char* strip_white_space(char* string) | ||||
| 	return string; | ||||
| } | ||||
| 
 | ||||
| static int is_valid_utf8_str(const char* string, size_t length) | ||||
| static int is_valid_utf8_str(const unsigned char* string, size_t length) | ||||
| { | ||||
| 	int expect = 0; | ||||
| 	char div = 0; | ||||
| @ -82,12 +82,32 @@ static int is_valid_utf8_str(const char* string, size_t length) | ||||
| 		{ | ||||
| 			if (string[pos] & 0x80) | ||||
| 			{ | ||||
| 				for (div = 0x40; div > 0x10; div /= 2) | ||||
| 				for (div = 0x40; div > 0x08; div /= 2) | ||||
| 				{ | ||||
| 					if (string[pos] & div) expect++; | ||||
| 					else break; | ||||
| 				} | ||||
| 				if ((string[pos] & div) || (pos+expect >= length)) return 0; | ||||
| 				switch (expect) { | ||||
| 					case 0: | ||||
| 						return 0; | ||||
| 					case 1: | ||||
| 						/* Out of range */ | ||||
| 						if (string[pos] < 0xC2) return 0; | ||||
| 						break; | ||||
| 					case 2: | ||||
| 						/* Out of range */ | ||||
| 						if ((string[pos] == 0xE0) && (string[pos+1] < 0xA0 )) return 0; | ||||
| 						/* Surrogates */ | ||||
| 						if ((string[pos] == 0xED) && (string[pos+1] > 0x9F )) return 0; | ||||
| 						break; | ||||
| 					case 3: | ||||
| 						/* Out of range */ | ||||
| 						if ((string[pos] == 0xF0) && (string[pos+1] < 0x90 )) return 0; | ||||
| 						if (string[pos] > 0xF4) return 0; | ||||
| 						if ((string[pos] == 0xF4) && (string[pos+1] > 0x8F )) return 0; | ||||
| 						break; | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user