Skip to content

Commit

Permalink
add sheet names that the string occured in developer comments
Browse files Browse the repository at this point in the history
  • Loading branch information
manosprom committed Nov 22, 2023
1 parent c453c3e commit b44ac80
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 18 deletions.
48 changes: 39 additions & 9 deletions openformats/formats/xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from zipfile import ZipFile, ZIP_DEFLATED

import six
from bs4 import BeautifulSoup

from openformats.formats.office_open_xml.parser import OfficeOpenXmlHandler
Expand Down Expand Up @@ -210,7 +209,11 @@ def _extract_sheet_names(xlsx, order):
sheets = wordbook_soup.find_all("sheet")
sheet_names = []
for sheet in sheets:
open_string = OpenString(sheet["name"], sheet["name"])
open_string = OpenString(
sheet["name"],
sheet["name"],
developer_comment=sheet["name"]
)
open_string.order = next(order)
sheet.attrs['txid'] = open_string.string_hash
sheet_names.append(open_string)
Expand Down Expand Up @@ -254,7 +257,11 @@ def parse(self, content, **kwargs):
shared_strings = shared_strings_soup.find_all("si")

extracted_strings = {}
for sheet, sheet_details in six.iteritems(xlsx.get_sheets()):
sheet_index = -1
for sheet, sheet_details in xlsx.get_sheets().items():
sheet_index += 1
sheet_name = sheet_names[sheet_index].string

sheet_soup = BeautifulSoup(xlsx.get_sheet_content(sheet), "xml")
sheet_rels_soup = None
if xlsx.has_rels(sheet):
Expand Down Expand Up @@ -339,15 +346,31 @@ def parse(self, content, **kwargs):
string = wrap(string, cell_hyper_link)

if string:
open_string = OpenString(string, string)
if open_string.string_hash not in extracted_strings:
open_string.order = next(order)
extracted_strings[open_string.string_hash] = open_string
find_string_from.attrs['txid'] = open_string.string_hash
open_string_tmp = OpenString(
key=string, string_or_strings=string
)

if open_string_tmp.string not in extracted_strings:
extracted_strings[string] = {
"sheets": [sheet_name],
}
find_string_from.attrs['txid'] = open_string_tmp.string_hash
elif sheet_name not in extracted_strings[string]["sheets"]:
extracted_strings[string]["sheets"].append(sheet_name)

xlsx.set_sheet_content(sheet, str(sheet_soup))
xlsx.set_shared_strings_content(str(shared_strings_soup))

all_strings = sheet_names + list(six.viewvalues(extracted_strings))
all_strings = list(sheet_names)

for string, string_details in extracted_strings.items():
developer_comment = ", ".join(string_details.get("sheets", []))
all_strings.append(OpenString(
string,
string,
developer_comment=developer_comment,
))

template = xlsx.compress()

xlsx.delete()
Expand Down Expand Up @@ -472,6 +495,13 @@ def compile(self, template, stringset, **kwargs):

translation_string = open_string.string
escaped_translation_string = self._escape_xml(translation_string)

"""
some examples of transifex translations would be
- part1
- <tx>part1</tx><tx>part2</tx>
- <tx href='app.transifex.com'><tx>part1</tx><tx>part2</tx></tx>
"""
translation_soup = BeautifulSoup(
u'<wrapper>{}</wrapper>'.format(escaped_translation_string), 'xml',
)
Expand Down
Binary file modified openformats/tests/formats/xlsx/files/example.xlsx
Binary file not shown.
78 changes: 69 additions & 9 deletions openformats/tests/formats/xlsx/test_xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,51 +112,111 @@ def test_xlsx_handler_parse(self):
self.assertIsNotNone(template)
self.assertEqual(len(stringset), 12)
self.assert_open_string(
stringset[0], {"string_hash": mock.ANY, "string": "Sheet1"}
stringset[0],
{
"string_hash": mock.ANY,
"string": "Sheet1",
"order": 1,
"developer_comments": "Sheet 1",
},
)
self.assert_open_string(
stringset[1], {"string_hash": mock.ANY, "string": "Sheet2"}
stringset[1],
{
"string_hash": mock.ANY,
"string": "Sheet2",
"order": 2,
"developer_comment": "Sheet2",
},
)
self.assert_open_string(
stringset[2], {"string_hash": mock.ANY, "string": "Sheet3"}
stringset[2],
{
"string_hash": mock.ANY,
"string": "Sheet3",
"order": 3,
"developer_comment": "Sheet3",
},
)
self.assert_open_string(
stringset[3], {"string_hash": mock.ANY, "string": "Sheet4"}
stringset[3],
{
"string_hash": mock.ANY,
"string": "Sheet4",
"order": 4,
"developer_comment": "Sheet4",
},
)
self.assert_open_string(
stringset[4], {"string_hash": mock.ANY, "string": "Sheet5"}
stringset[4],
{
"string_hash": mock.ANY,
"string": "Sheet5",
"order": 5,
"developer_comment": "Sheet5",
},
)
self.assert_open_string(
stringset[5], {"string_hash": mock.ANY, "string": "Sheet6"}
stringset[5],
{
"string_hash": mock.ANY,
"string": "Sheet6",
"order": 6,
"developer_comment": "Sheet6",
},
)
self.assert_open_string(
stringset[6],
{
"string_hash": mock.ANY,
"string": "<tx> I am a file </tx><tx>“bold”</tx>",
"order": 7,
"developer_comment": "Sheet1",
},
)
self.assert_open_string(
stringset[7], {"string_hash": mock.ANY, "string": "I have two sheets"}
stringset[7],
{
"string_hash": mock.ANY,
"string": "I have two sheets",
"order": 8,
"developer_comment": "Sheet2, Sheet4",
},
)
self.assert_open_string(
stringset[8],
{
"string_hash": mock.ANY,
"string": "<tx href='http://app.transifex.com/'>and a cell with a link</tx>",
"order": 9,
"developer_comment": "Sheet3",
},
)
self.assert_open_string(
stringset[9], {"string_hash": mock.ANY, "string": "And an inline string"}
stringset[9],
{
"string_hash": mock.ANY,
"string": "And an inline string",
"order": 10,
"developer_comment": "Sheet4",
},
)
self.assert_open_string(
stringset[10], {"string_hash": mock.ANY, "string": "and a comment"}
stringset[10],
{
"string_hash": mock.ANY,
"string": "and a comment",
"order": 11,
"developer_comment": "Sheet5",
},
)
self.assert_open_string(
stringset[11],
{
"string_hash": mock.ANY,
"string": "<tx href='https://www.google.com'>FormulaLink</tx>",
"order": 12,
"developer_comment": "Sheet6",
},
)

Expand Down

0 comments on commit b44ac80

Please sign in to comment.