diff --git a/Telegram/SourceFiles/export/data/export_data_types.cpp b/Telegram/SourceFiles/export/data/export_data_types.cpp index 4ea404b73859c5..9500010e2a65c5 100644 --- a/Telegram/SourceFiles/export/data/export_data_types.cpp +++ b/Telegram/SourceFiles/export/data/export_data_types.cpp @@ -61,6 +61,76 @@ Utf8String ParseString(const MTPstring &data) { return data.v; } +std::vector ParseText( + const MTPstring &data, + const QVector &entities) { + using Type = TextPart::Type; + const auto text = QString::fromUtf8(data.v); + const auto size = data.v.size(); + const auto mid = [&](int offset, int length) { + return text.mid(offset, length).toUtf8(); + }; + auto result = std::vector(); + auto offset = 0; + auto addTextPart = [&](int till) { + if (till > offset) { + auto part = TextPart(); + part.text = mid(offset, till - offset); + result.push_back(std::move(part)); + offset = till; + } + }; + for (const auto &entity : entities) { + const auto start = entity.match([](const auto &data) { + return data.voffset.v; + }); + const auto length = entity.match([](const auto &data) { + return data.vlength.v; + }); + + if (start < offset || length <= 0 || start + length > size) { + continue; + } + + addTextPart(start); + + auto part = TextPart(); + part.type = entity.match( + [](const MTPDmessageEntityUnknown&) { return Type::Unknown; }, + [](const MTPDmessageEntityMention&) { return Type::Mention; }, + [](const MTPDmessageEntityHashtag&) { return Type::Hashtag; }, + [](const MTPDmessageEntityBotCommand&) { + return Type::BotCommand; }, + [](const MTPDmessageEntityUrl&) { return Type::Url; }, + [](const MTPDmessageEntityEmail&) { return Type::Email; }, + [](const MTPDmessageEntityBold&) { return Type::Bold; }, + [](const MTPDmessageEntityItalic&) { return Type::Italic; }, + [](const MTPDmessageEntityCode&) { return Type::Code; }, + [](const MTPDmessageEntityPre&) { return Type::Pre; }, + [](const MTPDmessageEntityTextUrl&) { return Type::TextUrl; }, + [](const MTPDmessageEntityMentionName&) { + return Type::MentionName; }, + [](const MTPDinputMessageEntityMentionName&) { + return Type::MentionName; }, + [](const MTPDmessageEntityPhone&) { return Type::Phone; }, + [](const MTPDmessageEntityCashtag&) { return Type::Cashtag; }); + part.text = mid(start, length); + part.additional = entity.match( + [](const MTPDmessageEntityPre &data) { + return ParseString(data.vlanguage); + }, [](const MTPDmessageEntityTextUrl &data) { + return ParseString(data.vurl); + }, [](const MTPDmessageEntityMentionName &data) { + return NumberToString(data.vuser_id.v); + }, [](const auto &) { return Utf8String(); }); + + result.push_back(std::move(part)); + offset = start + length; + } + addTextPart(size); + return result; +} + Utf8String FillLeft(const Utf8String &data, int length, char filler) { if (length <= data.size()) { return data; @@ -812,7 +882,11 @@ Message ParseMessage( mediaFolder); context.botId = 0; } - result.text = ParseString(data.vmessage); + result.text = ParseText( + data.vmessage, + (data.has_entities() + ? data.ventities.v + : QVector{})); }, [&](const MTPDmessageService &data) { result.id = data.vid.v; const auto peerId = ParsePeerId(data.vto_id); diff --git a/Telegram/SourceFiles/export/data/export_data_types.h b/Telegram/SourceFiles/export/data/export_data_types.h index 3f03c6e3a0a8a8..bb0095643a77e3 100644 --- a/Telegram/SourceFiles/export/data/export_data_types.h +++ b/Telegram/SourceFiles/export/data/export_data_types.h @@ -415,6 +415,29 @@ ServiceAction ParseServiceAction( const MTPMessageAction &data, const QString &mediaFolder); +struct TextPart { + enum class Type { + Text, + Unknown, + Mention, + Hashtag, + BotCommand, + Url, + Email, + Bold, + Italic, + Code, + Pre, + TextUrl, + MentionName, + Phone, + Cashtag, + }; + Type type = Type::Text; + Utf8String text; + Utf8String additional; +}; + struct Message { int32 id = 0; int32 chatId = 0; @@ -425,7 +448,7 @@ struct Message { Utf8String signature; int32 viaBotId = 0; int32 replyToMsgId = 0; - Utf8String text; + std::vector text; Media media; ServiceAction action; diff --git a/Telegram/SourceFiles/export/output/export_output_abstract.h b/Telegram/SourceFiles/export/output/export_output_abstract.h index 1122d718810c95..dcf581d83b7951 100644 --- a/Telegram/SourceFiles/export/output/export_output_abstract.h +++ b/Telegram/SourceFiles/export/output/export_output_abstract.h @@ -29,10 +29,10 @@ struct Result; class Stats; enum class Format { + Json, Text, Yaml, Html, - Json, }; class AbstractWriter { diff --git a/Telegram/SourceFiles/export/output/export_output_json.cpp b/Telegram/SourceFiles/export/output/export_output_json.cpp index d41d3095c81357..f3543e1b378081 100644 --- a/Telegram/SourceFiles/export/output/export_output_json.cpp +++ b/Telegram/SourceFiles/export/output/export_output_json.cpp @@ -135,6 +135,64 @@ QByteArray SerializeArray( return result; } +QByteArray SerializeText( + Context &context, + const std::vector &data) { + using Type = Data::TextPart::Type; + + if (data.empty()) { + return SerializeString(""); + } + const auto text = ranges::view::all( + data + ) | ranges::view::transform([&](const Data::TextPart &part) { + if (part.type == Type::Text) { + return SerializeString(part.text); + } + const auto typeString = [&] { + switch (part.type) { + case Type::Unknown: return "unknown"; + case Type::Mention: return "mention"; + case Type::Hashtag: return "hashtag"; + case Type::BotCommand: return "bot_command"; + case Type::Url: return "link"; + case Type::Email: return "email"; + case Type::Bold: return "bold"; + case Type::Italic: return "italic"; + case Type::Code: return "code"; + case Type::Pre: return "pre"; + case Type::TextUrl: return "text_link"; + case Type::MentionName: return "mention_name"; + case Type::Phone: return "phone"; + case Type::Cashtag: return "cashtag"; + } + Unexpected("Type in SerializeText."); + }(); + const auto additionalName = (part.type == Type::MentionName) + ? "user_id" + : (part.type == Type::Pre) + ? "language" + : (part.type == Type::TextUrl) + ? "href" + : "none"; + const auto additionalValue = (part.type == Type::MentionName) + ? part.additional + : (part.type == Type::Pre || part.type == Type::TextUrl) + ? SerializeString(part.additional) + : QByteArray(); + return SerializeObject(context, { + { "type", SerializeString(typeString) }, + { "text", SerializeString(part.text) }, + { additionalName, additionalValue }, + }); + }) | ranges::to_vector; + + if (data.size() == 1 && data[0].type == Data::TextPart::Type::Text) { + return text[0]; + } + return SerializeArray(context, text); +} + Data::Utf8String FormatUsername(const Data::Utf8String &username) { return username.isEmpty() ? username : ('@' + username); } @@ -493,7 +551,7 @@ QByteArray SerializeMessage( Unexpected("Unsupported message."); }, [](const base::none_type &) {}); - push("text", message.text); + pushBare("text", SerializeText(context, message.text)); return serialized(); } diff --git a/Telegram/SourceFiles/export/output/export_output_text.cpp b/Telegram/SourceFiles/export/output/export_output_text.cpp index cbb03794b32660..cf9002f00b08a3 100644 --- a/Telegram/SourceFiles/export/output/export_output_text.cpp +++ b/Telegram/SourceFiles/export/output/export_output_text.cpp @@ -425,7 +425,12 @@ QByteArray SerializeMessage( Unexpected("Unsupported message."); }, [](const base::none_type &) {}); - push("Text", message.text); + auto value = JoinList(QByteArray(), ranges::view::all( + message.text + ) | ranges::view::transform([](const Data::TextPart &part) { + return part.text; + }) | ranges::to_vector); + push("Text", value); return SerializeKeyValue(std::move(values)); }