|
File reading (avoid reading half a character):
/ * ----------------------- Own_lib library series mbsfgets.cc ------------------ -------
cy_mbsfgets () /mbsfgets.o /libcyfunc.a
Description: When processing multi-byte text files (such as Chinese), the standard fgets () function can only read in bytes, which is easy
Read half a character and the read string is incomplete. To this end, some additional processing code needs to be added to the program.
But this makes the code look less concise.
This function attempts to encapsulate this process, making the reading of East Asian text more straightforward and the code more concise. Function
The numbers use the "mbstowcs () / wcstombs ()" standard library functions, so you should setlocale () at the beginning of your program.
Similar to the "fgets ()" standard library function, newline characters at the end of lines in a file are read in the same way as other characters.
Similarly, a '\0' (null character) terminator is appended to the end of the string.
When the user calls the function: pass the destination character with enough (MB_CUR_MAX * max_chars +1) memory space
String address (pointer) | Maximum number of characters (not bytes) | File stream pointer | and "LC_CTYPE" locale characters
String (such as: "zh_CN.GBK", the default value is NULL, which means that the system settings are not changed. Also when an invalid
When the locale is set, the function does not change the current setting, but the "setting" is already true!).
The next function is an overloaded function, adding a reference variable that returns the actual number of bytes of the read string and returns the actual read
A reference variable for the number of characters in the string.
dest_str:
A pointer and / or NULL value holding the memory space read into the string
max_chars:
The maximum number of characters to read from the file stream.
stream:
A file stream pointer for the FILE structure.
[length]:
Returns the number of bytes of the actual read string.
[chars]:
Returns the number of characters that actually read the string.
locale_ctype = NULL:
Read in the LC_CTYPE field (language / encoding) to which the string belongs. The general program has been set at the beginning, so it can be ignored
return value:
Returns a pointer to the string of dest_str memory space ('\0' has been added to the end). If there is an error, it returns NULL
Note! 1. The function reads a string from stream until a newline appears or reaches the end of the file or has read max_chars characters.
2. When passing the LC_CTYPE locale, the parameter dest_str should be NULL, the function returns to point to the free storage area
String pointer (should delete [] after assignment and assign NULL), if the parameter dest_str is not NULL,
The function moves the file stream pointer by one line and / or the specified number of characters (when less than one line), and returns NULL.
When the LC_CTYPE setting is not passed, and the parameter dest_str is set to NULL, its behavior is true as LC_CTYPE
&&dest_str is true (the empty string "" can be passed) (only the file stream pointer moves&&returns NULL)
3. The function will temporarily change the setting of "LC_CTYPE" after passing the locale string. It will restore its original value when exiting.
Here comes the setting.
Author: Ren Xiao | 2002.05.08.
Copyright: GNU General (Library) Public License (GPL / LGPL)
* Editor: vim-6.0 | Operating System: TurboLinux7.0 Simplified Chinese Edition *
-------------------------------------------------- ---------------------------- * /
#include <stdio.h> // usr for fgets ()
#include <stdlib.h> // usr for MB_CUR_MAX --The maximum byte length per word in the current multibyte environment.
// usr for mbs * and others.
#include <locale.h> // usr for setlocale () .-- The function provides the opportunity to change the environment "LC_CTYPE"
#include "cyinclude / cyfget.h"
char * cy_mbsfgets (char * dest_str, int max_chars, FILE * stream,
const char * locale_ctype = NULL)
{
long fpos_before;
if ((fpos_before = ftell (stream)) == -1) // Get the file stream pointer position before reading
return NULL;
int num = max_chars * MB_CUR_MAX;
bool locale_check = false;
char * locale_original = setlocale (LC_CTYPE, NULL); // LC_CTYPE original value
if (locale_ctype)
{
if (! dest_str)
{
setlocale (LC_CTYPE, locale_ctype);
num = max_chars * MB_CUR_MAX;
locale_check = true;
}
else
dest_str = NULL;
}
size_t test_length = 0; // used to test whether the number of input characters exceeds the requirement
char * tmp_chars = new char [num + 1]; // the maximum possible space for the string, for temporary storage
wchar_t * tmp_wchars = new wchar_t [max_chars + 1];
if ((! tmp_chars) || (! tmp_wchars))
{// One of them may have been successfully applied!
if (locale_check)
setlocale (LC_CTYPE, locale_original);
delete [] tmp_chars;
tmp_chars = NULL;
delete [] tmp_wchars;
tmp_wchars = NULL;
return NULL;
}
if (! fgets (tmp_chars, num + 1, stream)) // receive the maximum number of input strings
{
if (locale_check)
setlocale (LC_CTYPE, locale_original);
delete [] tmp_chars;
tmp_chars = NULL;
delete [] tmp_wchars;
tmp_wchars = NULL;
return NULL;
}
test_length = mbstowcs (tmp_wchars, tmp_chars, max_chars + 1);
if ((test_length == (size_t) -1) || (test_length == max_chars + 1))
tmp_wchars [max_chars] = L'\0 '; // Extra characters or illegal bytes are truncated or overwritten
size_t chars_length; // Actual read string length (used to set file stream pointer offset)
char * return_chars = NULL;
if (locale_check)
{
chars_length = wcstombs (NULL, tmp_wchars, num + 1);
return_chars = new char [chars_length + 1];
if (! return_chars)
{
setlocale (LC_CTYPE, locale_original);
delete [] tmp_chars;
tmp_chars = NULL;
delete [] tmp_wchars;
tmp_wchars = NULL;
return NULL;
}
wcstombs (return_chars, tmp_wchars, chars_length + 1);
}
else
chars_length = wcstombs (dest_str, tmp_wchars, num + 1);
Ranch
if (locale_check)
setlocale (LC_CTYPE, locale_original);
delete [] tmp_chars;
tmp_chars = NULL;
delete [] tmp_wchars;
tmp_wchars = NULL;
Ranch
if (fseek (stream, fpos_before + chars_length, SEEK_SET))
return NULL; // reset the file stream pointer, return NULL when there is an error
if (locale_check)
return return_chars;
else
return dest_str;
}
// ------------------------------------------------ -----------------------------
// Overloaded function of the previous function.
char * cy_mbsfgets (char * dest_str, int max_chars, FILE * stream,
int&length, int&chars, const char * locale_ctype = NULL)
{
long fpos_before;
if ((fpos_before = ftell (stream)) == -1) // Get the file stream pointer position before reading
return NULL;
int num = max_chars * MB_CUR_MAX;
bool locale_check = false;
char * locale_original = setlocale (LC_CTYPE, NULL); // LC_CTYPE original value
if (locale_ctype)
{
if (! dest_str)
{
setlocale (LC_CTYPE, locale_ctype);
num = max_chars * MB_CUR_MAX;
locale_check = true;
}
else
dest_str = NULL;
}
size_t test_length = 0; // used to test whether the number of input characters exceeds the requirement
char * tmp_chars = new char [num + 1]; // the maximum possible space for the string, for temporary storage
wchar_t * tmp_wchars = new wchar_t [max_chars + 1];
if ((! tmp_chars) || (! tmp_wchars))
{// One of them may have been successfully applied!
if (locale_check)
setlocale (LC_CTYPE, locale_original);
delete [] tmp_chars;
tmp_chars = NULL;
delete [] tmp_wchars;
tmp_wchars = NULL;
return NULL;
}
if (! fgets (tmp_chars, num + 1, stream)) // receive the maximum number of input strings
{
if (locale_check)
setlocale (LC_CTYPE, locale_original);
delete [] tmp_chars;
tmp_chars = NULL;
delete [] tmp_wchars;
tmp_wchars = NULL;
return NULL;
}
test_length = mbstowcs (tmp_wchars, tmp_chars, max_chars + 1);
if ((test_length == (size_t) -1) || (test_length == max_chars + 1))
{
tmp_wchars [max_chars] = L'\0 '; // Extra characters or illegal bytes are truncated or overwritten
chars = max_chars; // !. Passing by reference returns the actual number of string characters
}
else
chars = test_length; // !. Passing by reference returns the actual number of string characters
size_t chars_length; // Actual read string length, used to set file stream pointer offset and return value
char * return_chars = NULL;
if (locale_check)
{
chars_length = wcstombs (NULL, tmp_wchars, num + 1);
return_chars = new char [chars_length + 1];
if (! return_chars)
{
setlocale (LC_CTYPE, locale_original);
delete [] tmp_chars;
tmp_chars = NULL;
delete [] tmp_wchars;
tmp_wchars = NULL;
return NULL;
}
wcstombs (return_chars, tmp_wchars, chars_length + 1);
length = chars_length; // !. Passing by reference returns the actual string bytes
}
else
{
chars_length = wcstombs (dest_str, tmp_wchars, num + 1);
length = chars_length; // !. Passing by reference returns the actual string bytes
}
Ranch
if (locale_check)
setlocale (LC_CTYPE, locale_original);
delete [] tmp_chars;
tmp_chars = NULL;
delete [] tmp_wchars;
tmp_wchars = NULL;
Ranch
if (fseek (stream, fpos_before + chars_length, SEEK_SET))
return NULL; // reset the file stream pointer, return -1 when there is an error
Ranch
if (locale_check)
return return_chars;
else
return dest_str;
} |
|