From af1405ed41ca25bcc2967bcf97a0448d5cba208d Mon Sep 17 00:00:00 2001 From: tpowell-progress <104777878+tpowell-progress@users.noreply.github.com> Date: Wed, 25 Jan 2023 06:13:19 -0800 Subject: Force encoding with timezones that include umlauts if zone encoding is IBM437 (#1781) * If time.zone is encoded in Encoding::IBM437, force to WINDOWS_1252 * Added description of what's going on with the force/encode chain * Handle umlauts in popen output from Mixlib shellout Signed-off-by: Thomas Powell --- lib/ohai/plugins/timezone.rb | 20 ++++++++++++++++++++ lib/ohai/plugins/vmware.rb | 15 +++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/lib/ohai/plugins/timezone.rb b/lib/ohai/plugins/timezone.rb index 1d542e43..d2ab47dc 100644 --- a/lib/ohai/plugins/timezone.rb +++ b/lib/ohai/plugins/timezone.rb @@ -21,5 +21,25 @@ Ohai.plugin(:Timezone) do collect_data(:default) do time Mash.new unless time time[:timezone] = Time.now.getlocal.zone + + # Windows in German display language outputs LATIN1 bytes for .zone, but marks them as + # IBM437, which somehow fails any attempt at conversion to other encodings when + # ä is present, as in the timezone name "Mitteleuropäische Zeit" (Central Europe Time) + # + # Windows-1252 is the legacy encoding for Windows for German that actually + # translates (ISO-8859-1 works as well), but going with the more correct + # encoding name for Windows' implementation of Latin-1 + # + # References + # * [Code Page 437/IBM437](https://en.wikipedia.org/wiki/Code_page_437) + # * [ISO/IEC 8859-1](https://en.wikipedia.org/wiki/ISO/IEC_8859-1) + # * [Windows-1252](https://en.wikipedia.org/wiki/Windows-1252) + if time[:timezone].encoding == Encoding::IBM437 + # Assume encoding is WINDOWS_1252 + time[:timezone] = time[:timezone].force_encoding(Encoding::WINDOWS_1252) + # Re-encode in UTF_8. Note: If other encodings have problems converting + # it might be worth re-encode everything in UTF_8. + time[:timezone] = time[:timezone].encode(Encoding::UTF_8) + end end end diff --git a/lib/ohai/plugins/vmware.rb b/lib/ohai/plugins/vmware.rb index 250a9e24..438efb41 100644 --- a/lib/ohai/plugins/vmware.rb +++ b/lib/ohai/plugins/vmware.rb @@ -53,6 +53,21 @@ Ohai.plugin(:VMware) do # to attribute "vmware[:]" %w{hosttime speed sessionid balloon swap memlimit memres cpures cpulimit}.each do |param| vmware[param] = from_cmd([vmtools_path, "stat", param]) + if param == "hosttime" && vmtools_path.include?("Program Files") + # popen and %x return stdout encoded as IBM437 in Windows but in a string marked + # UTF-8. The string doesn't throw an exception when encoded to "UTF-8" but + # displays [?] character in Windows without this. + # + # .force_encoding(Encoding::ISO_8859_1) causes the character to be dropped + # and .force_encoding(Encoding::Windows_1252) displays the „ character in place + # of an ä. .force_encoding(Encoding::IBM437) allows for the correct characters + # to be displayed. + # + # Note: + # * this is broken for at least Ruby 2.7 through 3.1.3 + # * confirmed that this is broken on Windows Server 2022 + vmware[param] = vmware[param].force_encoding(Encoding::IBM437).encode("UTF-8") + end if /UpdateInfo failed/.match?(vmware[param]) vmware[param] = nil end -- cgit v1.2.1