2018-12-11 12:23:17 +13:00
#!/usr/bin/env python3
2017-01-13 10:22:16 +13:00
#
# Unix SMB/CIFS implementation.
#
# WERROR error definition generation
#
# Copyright (C) Catalyst.Net Ltd. 2017
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
s4/scripting/bin: open unicode files with utf8 encoding and write unicode string
In files like `libcli/util/werror_err_table.txt` and `libcli/util/ntstatus_err_table.txt`,
there were unicode quote symbols at line 6:
...(“this documentation”)...
In `libcli/util/wscript_build`, it will run `gen_werror.py` and `gen_ntstatus.py`
to `open` above files, read content from them and write to other files.
When encoding not specified, `open` in both python 2/3 will guess encoding from locale.
When locale is not set, it defaults to POSIX or C, and then python will use
encoding `ANSI_X3.4-1968`.
So, on a system locale is not set, `make` will fail with encoding error
for both python 2 and 3:
File "/home/ubuntu/samba/source4/scripting/bin/gen_werror.py", line 139, in main
errors = parseErrorDescriptions(input_file, True, transformErrorName)
File "/home/ubuntu/samba/source4/scripting/bin/gen_error_common.py", line 52, in parseErrorDescriptions
for line in file_contents:
File "/usr/lib/python3.5/encodings/ascii.py", line 26, in decode
return codecs.ascii_decode(input, self.errors)[0]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 318: ordinal not in range(128)
In this case, we have to use `io.open` with `encoding='utf8'`.
However, then we got unicode strs and try to write them with other strs
into new file, which means the new file must also open with utf-8 and
all other strs have to be unicode, too.
Instead of prefix `u` to all strs, a more easier/elegant way is to enable
unicode literals for the python scripts, which we normally didn't do in samba.
Since both `gen_werror.py` and `gen_ntstatus.py` are bin scripts and no
other modules import them, it should be ok for this case.
Signed-off-by: Joe Guo <joeg@catalyst.net.nz>
Autobuild-User(master): Douglas Bagnall <dbagnall@samba.org>
Autobuild-Date(master): Fri Feb 8 06:34:47 CET 2019 on sn-devel-144
2019-01-30 15:52:08 +13:00
from __future__ import unicode_literals
# this file is a bin script and was not imported by any other modules
# so it should be fine to enable unicode string for python2
2017-01-13 10:22:16 +13:00
import sys , os . path , io , string
from gen_error_common import parseErrorDescriptions , ErrorDef
def generateHeaderFile ( out_file , errors ) :
out_file . write ( " /* \n " )
out_file . write ( " * Descriptions for errors generated from \n " )
out_file . write ( " * [MS-ERREF] https://msdn.microsoft.com/en-us/library/cc231199.aspx \n " )
out_file . write ( " */ \n \n " )
out_file . write ( " #ifndef _WERR_GEN_H \n " )
out_file . write ( " #define _WERR_GEN_H \n " )
for err in errors :
line = " #define %s W_ERROR( %s ) \n " % ( err . err_define , hex ( err . err_code ) )
out_file . write ( line )
out_file . write ( " \n #endif /* _WERR_GEN_H */ \n " )
def generateSourceFile ( out_file , errors ) :
out_file . write ( " #include \" werror.h \" \n " )
out_file . write ( " /* \n " )
out_file . write ( " * Names for errors generated from \n " )
out_file . write ( " * [MS-ERREF] https://msdn.microsoft.com/en-us/library/cc231199.aspx \n " )
out_file . write ( " */ \n " )
out_file . write ( " static const struct werror_code_struct dos_errs[] = \n " )
out_file . write ( " { \n " )
for err in errors :
out_file . write ( " \t { \" %s \" , %s }, \n " % ( err . err_define , err . err_define ) )
out_file . write ( " { 0, W_ERROR(0) } \n " )
out_file . write ( " }; \n " )
out_file . write ( " \n /* \n " )
out_file . write ( " * Descriptions for errors generated from \n " )
out_file . write ( " * [MS-ERREF] https://msdn.microsoft.com/en-us/library/cc231199.aspx \n " )
out_file . write ( " */ \n " )
out_file . write ( " static const struct werror_str_struct dos_err_strs[] = \n " )
out_file . write ( " { \n " )
for err in errors :
# Account for the possibility that some errors may not have descriptions
if err . err_string == " " :
continue
out_file . write ( " \t { %s , \" %s \" }, \n " % ( err . err_define , err . err_string ) )
out_file . write ( " \t { W_ERROR(0), 0 } \n " )
out_file . write ( " }; " )
def generatePythonFile ( out_file , errors ) :
out_file . write ( " /* \n " )
out_file . write ( " * Errors generated from \n " )
out_file . write ( " * [MS-ERREF] https://msdn.microsoft.com/en-us/library/cc231199.aspx \n " )
out_file . write ( " */ \n " )
out_file . write ( " #include <Python.h> \n " )
2017-08-08 11:50:30 +02:00
out_file . write ( " #include \" python/py3compat.h \" \n " )
2017-01-13 10:22:16 +13:00
out_file . write ( " #include \" includes.h \" \n \n " )
# This is needed to avoid a missing prototype error from the C
# compiler. There is never a prototype for this function, it is a
# module loaded by python with dlopen() and found with dlsym().
2017-08-08 11:50:30 +02:00
out_file . write ( " static struct PyModuleDef moduledef = { \n " )
out_file . write ( " \t PyModuleDef_HEAD_INIT, \n " )
out_file . write ( " \t .m_name = \" werror \" , \n " )
out_file . write ( " \t .m_doc = \" WERROR defines \" , \n " )
out_file . write ( " \t .m_size = -1, \n " )
out_file . write ( " }; \n \n " )
out_file . write ( " MODULE_INIT_FUNC(werror) \n " )
2017-01-13 10:22:16 +13:00
out_file . write ( " { \n " )
out_file . write ( " \t PyObject *m; \n \n " )
2017-08-08 11:50:30 +02:00
out_file . write ( " \t m = PyModule_Create(&moduledef); \n " ) ;
2017-01-13 10:22:16 +13:00
out_file . write ( " \t if (m == NULL) \n " ) ;
2017-08-08 11:50:30 +02:00
out_file . write ( " \t \t return NULL; \n \n " ) ;
2017-01-13 10:22:16 +13:00
for err in errors :
line = """ \t PyModule_AddObject(m, \" %s \" ,
2019-05-06 15:58:49 +12:00
\t \tPyLong_FromUnsignedLongLong ( W_ERROR_V ( % s ) ) ) ; \n """ % (err.err_define, err.err_define)
2017-01-13 10:22:16 +13:00
out_file . write ( line )
2017-08-08 11:50:30 +02:00
out_file . write ( " \n " ) ;
out_file . write ( " \t return m; \n " ) ;
2017-01-13 10:22:16 +13:00
out_file . write ( " } \n " ) ;
def transformErrorName ( error_name ) :
if error_name . startswith ( " WERR_ " ) :
error_name = error_name . replace ( " WERR_ " , " " , 1 )
elif error_name . startswith ( " ERROR_ " ) :
error_name = error_name . replace ( " ERROR_ " , " " , 1 )
return " WERR_ " + error_name . upper ( )
# Script to generate files werror_gen.h, doserr_gen.c and
# py_werror.c.
#
# These files contain generated definitions for WERRs and
# their descriptions/names.
#
# This script takes four inputs:
# [1]: The name of the text file which is the content of an HTML table
# (e.g. the one found at https://msdn.microsoft.com/en-us/library/cc231199.aspx)
# copied and pasted.
# [2]: [[output werror_gen.h]]
# [3]: [[output doserr_gen.c]]
# [4]: [[output py_werror.c]]
def main ( ) :
if len ( sys . argv ) == 5 :
input_file_name = sys . argv [ 1 ]
gen_headerfile_name = sys . argv [ 2 ]
gen_sourcefile_name = sys . argv [ 3 ]
gen_pythonfile_name = sys . argv [ 4 ]
else :
2017-08-08 10:56:17 +02:00
print ( " usage: %s winerrorfile headerfile sourcefile pythonfile " % sys . argv [ 0 ] )
2017-01-13 10:22:16 +13:00
sys . exit ( )
s4/scripting/bin: open unicode files with utf8 encoding and write unicode string
In files like `libcli/util/werror_err_table.txt` and `libcli/util/ntstatus_err_table.txt`,
there were unicode quote symbols at line 6:
...(“this documentation”)...
In `libcli/util/wscript_build`, it will run `gen_werror.py` and `gen_ntstatus.py`
to `open` above files, read content from them and write to other files.
When encoding not specified, `open` in both python 2/3 will guess encoding from locale.
When locale is not set, it defaults to POSIX or C, and then python will use
encoding `ANSI_X3.4-1968`.
So, on a system locale is not set, `make` will fail with encoding error
for both python 2 and 3:
File "/home/ubuntu/samba/source4/scripting/bin/gen_werror.py", line 139, in main
errors = parseErrorDescriptions(input_file, True, transformErrorName)
File "/home/ubuntu/samba/source4/scripting/bin/gen_error_common.py", line 52, in parseErrorDescriptions
for line in file_contents:
File "/usr/lib/python3.5/encodings/ascii.py", line 26, in decode
return codecs.ascii_decode(input, self.errors)[0]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 318: ordinal not in range(128)
In this case, we have to use `io.open` with `encoding='utf8'`.
However, then we got unicode strs and try to write them with other strs
into new file, which means the new file must also open with utf-8 and
all other strs have to be unicode, too.
Instead of prefix `u` to all strs, a more easier/elegant way is to enable
unicode literals for the python scripts, which we normally didn't do in samba.
Since both `gen_werror.py` and `gen_ntstatus.py` are bin scripts and no
other modules import them, it should be ok for this case.
Signed-off-by: Joe Guo <joeg@catalyst.net.nz>
Autobuild-User(master): Douglas Bagnall <dbagnall@samba.org>
Autobuild-Date(master): Fri Feb 8 06:34:47 CET 2019 on sn-devel-144
2019-01-30 15:52:08 +13:00
input_file = io . open ( input_file_name , " rt " , encoding = ' utf8 ' )
2017-01-13 10:22:16 +13:00
errors = parseErrorDescriptions ( input_file , True , transformErrorName )
input_file . close ( )
2017-08-08 10:56:17 +02:00
print ( " writing new header file: %s " % gen_headerfile_name )
s4/scripting/bin: open unicode files with utf8 encoding and write unicode string
In files like `libcli/util/werror_err_table.txt` and `libcli/util/ntstatus_err_table.txt`,
there were unicode quote symbols at line 6:
...(“this documentation”)...
In `libcli/util/wscript_build`, it will run `gen_werror.py` and `gen_ntstatus.py`
to `open` above files, read content from them and write to other files.
When encoding not specified, `open` in both python 2/3 will guess encoding from locale.
When locale is not set, it defaults to POSIX or C, and then python will use
encoding `ANSI_X3.4-1968`.
So, on a system locale is not set, `make` will fail with encoding error
for both python 2 and 3:
File "/home/ubuntu/samba/source4/scripting/bin/gen_werror.py", line 139, in main
errors = parseErrorDescriptions(input_file, True, transformErrorName)
File "/home/ubuntu/samba/source4/scripting/bin/gen_error_common.py", line 52, in parseErrorDescriptions
for line in file_contents:
File "/usr/lib/python3.5/encodings/ascii.py", line 26, in decode
return codecs.ascii_decode(input, self.errors)[0]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 318: ordinal not in range(128)
In this case, we have to use `io.open` with `encoding='utf8'`.
However, then we got unicode strs and try to write them with other strs
into new file, which means the new file must also open with utf-8 and
all other strs have to be unicode, too.
Instead of prefix `u` to all strs, a more easier/elegant way is to enable
unicode literals for the python scripts, which we normally didn't do in samba.
Since both `gen_werror.py` and `gen_ntstatus.py` are bin scripts and no
other modules import them, it should be ok for this case.
Signed-off-by: Joe Guo <joeg@catalyst.net.nz>
Autobuild-User(master): Douglas Bagnall <dbagnall@samba.org>
Autobuild-Date(master): Fri Feb 8 06:34:47 CET 2019 on sn-devel-144
2019-01-30 15:52:08 +13:00
out_file = io . open ( gen_headerfile_name , " wt " , encoding = ' utf8 ' )
2017-01-13 10:22:16 +13:00
generateHeaderFile ( out_file , errors )
out_file . close ( )
2017-08-08 10:56:17 +02:00
print ( " writing new source file: %s " % gen_sourcefile_name )
s4/scripting/bin: open unicode files with utf8 encoding and write unicode string
In files like `libcli/util/werror_err_table.txt` and `libcli/util/ntstatus_err_table.txt`,
there were unicode quote symbols at line 6:
...(“this documentation”)...
In `libcli/util/wscript_build`, it will run `gen_werror.py` and `gen_ntstatus.py`
to `open` above files, read content from them and write to other files.
When encoding not specified, `open` in both python 2/3 will guess encoding from locale.
When locale is not set, it defaults to POSIX or C, and then python will use
encoding `ANSI_X3.4-1968`.
So, on a system locale is not set, `make` will fail with encoding error
for both python 2 and 3:
File "/home/ubuntu/samba/source4/scripting/bin/gen_werror.py", line 139, in main
errors = parseErrorDescriptions(input_file, True, transformErrorName)
File "/home/ubuntu/samba/source4/scripting/bin/gen_error_common.py", line 52, in parseErrorDescriptions
for line in file_contents:
File "/usr/lib/python3.5/encodings/ascii.py", line 26, in decode
return codecs.ascii_decode(input, self.errors)[0]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 318: ordinal not in range(128)
In this case, we have to use `io.open` with `encoding='utf8'`.
However, then we got unicode strs and try to write them with other strs
into new file, which means the new file must also open with utf-8 and
all other strs have to be unicode, too.
Instead of prefix `u` to all strs, a more easier/elegant way is to enable
unicode literals for the python scripts, which we normally didn't do in samba.
Since both `gen_werror.py` and `gen_ntstatus.py` are bin scripts and no
other modules import them, it should be ok for this case.
Signed-off-by: Joe Guo <joeg@catalyst.net.nz>
Autobuild-User(master): Douglas Bagnall <dbagnall@samba.org>
Autobuild-Date(master): Fri Feb 8 06:34:47 CET 2019 on sn-devel-144
2019-01-30 15:52:08 +13:00
out_file = io . open ( gen_sourcefile_name , " wt " , encoding = ' utf8 ' )
2017-01-13 10:22:16 +13:00
generateSourceFile ( out_file , errors )
out_file . close ( )
2017-08-08 10:56:17 +02:00
print ( " writing new python file: %s " % gen_pythonfile_name )
s4/scripting/bin: open unicode files with utf8 encoding and write unicode string
In files like `libcli/util/werror_err_table.txt` and `libcli/util/ntstatus_err_table.txt`,
there were unicode quote symbols at line 6:
...(“this documentation”)...
In `libcli/util/wscript_build`, it will run `gen_werror.py` and `gen_ntstatus.py`
to `open` above files, read content from them and write to other files.
When encoding not specified, `open` in both python 2/3 will guess encoding from locale.
When locale is not set, it defaults to POSIX or C, and then python will use
encoding `ANSI_X3.4-1968`.
So, on a system locale is not set, `make` will fail with encoding error
for both python 2 and 3:
File "/home/ubuntu/samba/source4/scripting/bin/gen_werror.py", line 139, in main
errors = parseErrorDescriptions(input_file, True, transformErrorName)
File "/home/ubuntu/samba/source4/scripting/bin/gen_error_common.py", line 52, in parseErrorDescriptions
for line in file_contents:
File "/usr/lib/python3.5/encodings/ascii.py", line 26, in decode
return codecs.ascii_decode(input, self.errors)[0]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 318: ordinal not in range(128)
In this case, we have to use `io.open` with `encoding='utf8'`.
However, then we got unicode strs and try to write them with other strs
into new file, which means the new file must also open with utf-8 and
all other strs have to be unicode, too.
Instead of prefix `u` to all strs, a more easier/elegant way is to enable
unicode literals for the python scripts, which we normally didn't do in samba.
Since both `gen_werror.py` and `gen_ntstatus.py` are bin scripts and no
other modules import them, it should be ok for this case.
Signed-off-by: Joe Guo <joeg@catalyst.net.nz>
Autobuild-User(master): Douglas Bagnall <dbagnall@samba.org>
Autobuild-Date(master): Fri Feb 8 06:34:47 CET 2019 on sn-devel-144
2019-01-30 15:52:08 +13:00
out_file = io . open ( gen_pythonfile_name , " wt " , encoding = ' utf8 ' )
2017-01-13 10:22:16 +13:00
generatePythonFile ( out_file , errors )
out_file . close ( )
if __name__ == ' __main__ ' :
main ( )