Skip to content

Commit

Permalink
Use system code page for ANSI conversion
Browse files Browse the repository at this point in the history
Due to problems with the `_autodetect_all` encoding parameter when processing source code files, we are now using a simple `GetACP` API call to retrieve the active code page from Windows, and mapping this to the appropriate encoding. (With support to easily add other encoding mappings in the future.) This should solve the issue described in #186 where the automatic detection was guessing the language incorrectly.
  • Loading branch information
joyfullservice committed Mar 8, 2021
1 parent e960642 commit d84d8b7
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Version Control.accda.src/dbs-properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"Type": 10
},
"AppVersion": {
"Value": "3.3.7",
"Value": "3.3.8",
"Type": 10
},
"Auto Compact": {
Expand Down
2 changes: 1 addition & 1 deletion Version Control.accda.src/modules/clsPerformance.bas
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ Public Function GetReports() As String
Dim strSpacer As String

' Set up column sizes
lngCol(0) = 25
lngCol(0) = 30
lngCol(1) = 10
lngCol(2) = 10
strSpacer = Space(lngCol(0) + lngCol(1) + lngCol(2))
Expand Down
55 changes: 49 additions & 6 deletions Version Control.accda.src/modules/modEncoding.bas
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ Option Private Module
Option Explicit


' API call to determine active code page (default system encoding)
Private Declare PtrSafe Function GetACP Lib "kernel32" () As Long


' Cache the Ucs2 requirement for this database
Private m_blnUcs2 As Boolean
Private m_strDbPath As String
Expand Down Expand Up @@ -200,7 +204,7 @@ Public Sub ConvertAnsiUtf8(strSourceFile As String, strDestinationFile As String
Optional blnDeleteSourceFileAfterConversion As Boolean = True)

' Perform file conversion
ReEncodeFile strSourceFile, "_autodetect_all", strDestinationFile, "UTF-8", adSaveCreateOverWrite
ReEncodeFile strSourceFile, GetSystemEncoding, strDestinationFile, "utf-8", adSaveCreateOverWrite

' Remove original file if specified.
If blnDeleteSourceFileAfterConversion Then DeleteFile strSourceFile
Expand All @@ -219,7 +223,7 @@ Public Sub ConvertUtf8Ansi(strSourceFile As String, strDestinationFile As String
Optional blnDeleteSourceFileAfterConversion As Boolean = True)

' Perform file conversion
ReEncodeFile strSourceFile, "UTF-8", strDestinationFile, "_autodetect_all", adSaveCreateOverWrite
ReEncodeFile strSourceFile, "utf-8", strDestinationFile, GetSystemEncoding, adSaveCreateOverWrite

' Remove original file if specified.
If blnDeleteSourceFileAfterConversion Then DeleteFile strSourceFile
Expand Down Expand Up @@ -297,9 +301,7 @@ Public Sub ReEncodeFile(strInputFile As String, strInputCharset As String, _
Dim objOutputStream As ADODB.Stream

' Open streams and copy data
Perf.OperationStart "Enc " & _
Replace(strInputCharset, "_autodetect_all", "AUTO") & " as " & _
Replace(strOutputCharset, "_autodetect_all", "AUTO")
Perf.OperationStart "Enc. " & strInputCharset & " as " & strOutputCharset
Set objOutputStream = New ADODB.Stream
With New ADODB.Stream
.Open
Expand All @@ -319,4 +321,45 @@ Public Sub ReEncodeFile(strInputFile As String, strInputCharset As String, _
objOutputStream.Close
Perf.OperationEnd

End Sub
End Sub


'---------------------------------------------------------------------------------------
' Procedure : GetSystemEncoding
' Author : Adam Waller
' Date : 3/8/2021
' Purpose : Return the current encoding type used for non-UTF-8 text files.
' : (Such as VBA code modules.)
' : https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
' : https://documentation.help/MS-Office-VB/ofhowConstants.htm
' : * Note that using utf-8 as a default system encoding may not work
' : correctly with some extended characters in VBA code modules. The VBA IDE
' : does not support Unicode characters, and requires code pages to display
' : extended/non-English characters. See Issues #60, #186, #180
'---------------------------------------------------------------------------------------
'
Public Function GetSystemEncoding() As String

Static lngEncoding As Long

' Call API to determine active code page, caching return value.
If lngEncoding = 0 Then lngEncoding = GetACP
Select Case lngEncoding

' Language encoding mappings can be defined here
Case msoEncodingISO88591Latin1: GetSystemEncoding = "iso-8859-1"
Case msoEncodingWestern: GetSystemEncoding = "windows-1252"

' *In Windows 10, this is a checkbox in Region settings for
' "Beta: Use Unicode UTF-8 for worldwide language support"
Case msoEncodingUTF8: GetSystemEncoding = "utf-8"

' Any other language encoding not defined above
Case Else
' Attempt to autodetect the language based on the content.
' (Note that this does not work as well on code as it does
' with normal written language. See issue #186)
GetSystemEncoding = "_autodetect_all"
End Select

End Function
2 changes: 1 addition & 1 deletion Version Control.accda.src/modules/modVCSUtility.bas
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ Public Sub SaveComponentAsText(intType As AcObjectType, _
Case acModule '(ANSI text file)
' Modules may contain extended characters that need UTF-8 conversion
' to display correctly in some editors.
If StringHasExtendedASCII(ReadFile(strTempFile, "_autodetect_all")) Then
If StringHasExtendedASCII(ReadFile(strTempFile, GetSystemEncoding)) Then
' Convert to UTF-8
ConvertAnsiUtf8 strTempFile, strFile
Else
Expand Down

0 comments on commit d84d8b7

Please sign in to comment.