Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support setting language on posts #579

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions activities/migrations/0017_post_language.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.1 on 2023-05-15 09:26

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("activities", "0016_index_together_migration"),
]

operations = [
migrations.AddField(
model_name="post",
name="language",
field=models.CharField(default=""),
),
]
23 changes: 22 additions & 1 deletion activities/models/post.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from core.ld import (
canonicalise,
format_ld_date,
get_language,
get_list,
get_value_or_map,
parse_ld_date,
Expand Down Expand Up @@ -252,6 +253,9 @@ class Types(models.TextChoices):
# The main (HTML) content
content = models.TextField()

# The language of the content
language = models.CharField(default="")

type = models.CharField(
max_length=20,
choices=Types.choices,
Expand Down Expand Up @@ -474,6 +478,7 @@ def create_local(
reply_to: Optional["Post"] = None,
attachments: list | None = None,
question: dict | None = None,
language: str | None = None,
) -> "Post":
with transaction.atomic():
# Find mentions in this post
Expand All @@ -492,6 +497,9 @@ def create_local(
sorted([tag[: Hashtag.MAXIMUM_LENGTH] for tag in parser.hashtags])
or None
)
if language is None or language == "":
language = author.config_identity.preferred_posting_language

# Make the Post object
post = cls.objects.create(
author=author,
Expand All @@ -502,6 +510,7 @@ def create_local(
visibility=visibility,
hashtags=hashtags,
in_reply_to=reply_to.object_uri if reply_to else None,
language=language,
)
post.object_uri = post.urls.object_uri
post.url = post.absolute_object_uri()
Expand All @@ -526,6 +535,7 @@ def edit_local(
visibility: int = Visibilities.public,
attachments: list | None = None,
attachment_attributes: list | None = None,
language: str | None = None,
):
with transaction.atomic():
# Strip all HTML and apply linebreaks filter
Expand All @@ -538,6 +548,9 @@ def edit_local(
self.summary = summary or None
self.sensitive = bool(summary) if sensitive is None else sensitive
self.visibility = visibility
if language is None or language == "":
language = self.author.config_identity.preferred_posting_language
self.language = language
self.edited = timezone.now()
self.mentions.set(self.mentions_from_content(content, self.author))
self.emojis.set(Emoji.emojis_from_content(content, None))
Expand Down Expand Up @@ -649,6 +662,10 @@ def to_ap(self) -> dict:
"tag": [],
"attachment": [],
}
if self.language != "":
value["contentMap"] = {
self.language: value["content"],
}
if self.type == Post.Types.question and self.type_data:
value[self.type_data.mode] = [
{
Expand Down Expand Up @@ -872,6 +889,7 @@ def by_ap(cls, data, create=False, update=False, fetch_author=False) -> "Post":
post.published = parse_ld_date(data.get("published"))
post.edited = parse_ld_date(data.get("updated"))
post.in_reply_to = data.get("inReplyTo")
post.language = get_language(data) or ""
# Mentions and hashtags
post.hashtags = []
for tag in get_list(data, "tag"):
Expand Down Expand Up @@ -1106,12 +1124,16 @@ def to_mastodon_json(self, interactions=None, bookmarks=None, identity=None):
self.Visibilities.mentioned: "direct",
self.Visibilities.local_only: "public",
}
language = self.language
if self.language == "":
language = None
value = {
"id": self.pk,
"uri": self.object_uri,
"created_at": format_ld_date(self.published),
"account": self.author.to_mastodon_json(include_counts=False),
"content": self.safe_content_remote(),
"language": language,
"visibility": visibility_mapping[self.visibility],
"sensitive": self.sensitive,
"spoiler_text": self.summary or "",
Expand Down Expand Up @@ -1152,7 +1174,6 @@ def to_mastodon_json(self, interactions=None, bookmarks=None, identity=None):
if isinstance(self.type_data, QuestionData)
else None,
"card": None,
"language": None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to be updated to forward the stored language?

"text": self.safe_content_remote(),
"edited_at": format_ld_date(self.edited) if self.edited else None,
}
Expand Down
10 changes: 8 additions & 2 deletions api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ class Status(Schema):
reblog: Optional["Status"] = Field(...)
poll: Poll | None = Field(...)
card: None = Field(...)
language: None = Field(...)
language: str | None = Field(...)
text: str | None = Field(...)
edited_at: str | None
favourited: bool = False
Expand Down Expand Up @@ -422,13 +422,19 @@ def from_identity(
activities_models.Post.Visibilities.mentioned: "direct",
activities_models.Post.Visibilities.local_only: "public",
}
preferred_posting_language = None
if identity.config_identity.preferred_posting_language != "":
preferred_posting_language = (
identity.config_identity.preferred_posting_language
)

return cls.parse_obj(
{
"posting:default:visibility": visibility_mapping[
identity.config_identity.default_post_visibility
],
"posting:default:sensitive": False,
"posting:default:language": None,
"posting:default:language": preferred_posting_language,
"reading:expand:media": "default",
"reading:expand:spoilers": identity.config_identity.expand_content_warnings,
}
Expand Down
2 changes: 2 additions & 0 deletions api/views/statuses.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def post_status(request, details: PostStatusSchema) -> schemas.Status:
reply_to=reply_post,
attachments=attachments,
question=details.poll.dict() if details.poll else None,
language=details.language,
)
# Add their own timeline event for immediate visibility
TimelineEvent.add_post(request.identity, post)
Expand Down Expand Up @@ -141,6 +142,7 @@ def edit_status(request, id: str, details: EditStatusSchema) -> schemas.Status:
sensitive=details.sensitive,
attachments=attachments,
attachment_attributes=details.media_attributes,
language=details.language,
)
return schemas.Status.from_post(post)

Expand Down
22 changes: 22 additions & 0 deletions core/ld.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import os
import re
import urllib.parse as urllib_parse

from dateutil import parser
Expand Down Expand Up @@ -692,3 +693,24 @@ def media_type_from_filename(filename):
return "image/webp"
else:
return "application/octet-stream"


def get_language(data) -> str | None:
"""Detects and returns a document's language"""
map_ = None
if "contentMap" in data:
map_ = data["contentMap"]
elif "nameMap" in data:
map_ = data["nameMap"]
elif "summaryMap" in data:
map_ = data["summaryMap"]

if not map_:
return None

lang = list(map_.keys())[0]
if not lang or lang == "und":
return None

lang = re.split("-|_", lang)[0]
return lang.lower()
1 change: 1 addition & 0 deletions core/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ class IdentityOptions(pydantic.BaseModel):
visible_reaction_counts: bool = True
expand_content_warnings: bool = False
boosts_on_profile: bool = True
preferred_posting_language: str = ""

class DomainOptions(pydantic.BaseModel):
site_name: str = ""
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ httpx~=0.23
markdown_it_py~=2.1.0
pillow~=9.3.0
psycopg~=3.1.8
pycountry~=22.3.5
pydantic~=1.10.2
pyld~=2.0.3
pylibmc~=1.6.3
Expand Down
2 changes: 1 addition & 1 deletion templates/activities/_post.html
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
</div>
{% endif %}

<div class="content {% if post.summary %}hidden {% endif %}">
<div class="content {% if post.summary %}hidden {% endif %}"{% if post.language %} lang="{{ post.language }}"{% endif %}>
{{ post.safe_content_local }}

{% if post.attachments.exists %}
Expand Down
3 changes: 3 additions & 0 deletions tests/activities/models/test_post.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def test_content_map(remote_identity):
create=True,
)
assert post.content == "Hi World"
assert post.language == ""

post2 = Post.by_ap(
data={
Expand All @@ -271,6 +272,7 @@ def test_content_map(remote_identity):
create=True,
)
assert post2.content == "Hey World"
assert post2.language == ""

post3 = Post.by_ap(
data={
Expand All @@ -283,6 +285,7 @@ def test_content_map(remote_identity):
create=True,
)
assert post3.content == "Hello World"
assert post3.language == "en"


@pytest.mark.django_db
Expand Down
40 changes: 39 additions & 1 deletion tests/core/test_ld.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from dateutil.tz import tzutc

from core.ld import parse_ld_date
from core.ld import get_language, parse_ld_date


def test_parse_ld_date():
Expand Down Expand Up @@ -41,3 +41,41 @@ def test_parse_ld_date():
tzinfo=tzutc(),
)
assert difference.total_seconds() == 0


def test_get_language():
assert (
get_language(
{
"contentMap": {
"en": "<p>Hello</p>",
"es": "<p>hola</p>",
},
"nameMap": {"de": "Hallo"},
"summaryMap": {"fr": "Bonjour"},
}
)
== "en"
)
assert (
get_language(
{
"nameMap": {"de": "Hallo"},
"summaryMap": {"fr": "Bonjour"},
}
)
== "de"
)
assert (
get_language(
{
"summaryMap": {"fr": "Bonjour"},
}
)
== "fr"
)
assert get_language({"contentMap": {"en-gb": "<p>Hello</p>"}}) == "en"
assert get_language({"contentMap": {"en_GB": "<p>Hello</p>"}}) == "en"
assert get_language({"contentMap": {"EN": "<p>Hello</p>"}}) == "en"
assert get_language({"contentMap": {"und": "<p>Hello</p>"}}) is None
assert get_language({}) is None
25 changes: 24 additions & 1 deletion users/views/settings/posting.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pycountry

from activities.models.post import Post
from users.views.settings.settings_page import SettingsPage

Expand All @@ -15,8 +17,29 @@ class PostingPage(SettingsPage):
"title": "Expand content warnings",
"help_text": "If content warnings should be expanded by default (not honoured by all clients)",
},
"preferred_posting_language": {
"title": "Default posting language",
"help_text": "",
"choices": sorted(
(
[
("", ""),
]
+ [
(lang.alpha_2, lang.name)
for lang in pycountry.languages
if hasattr(lang, "alpha_2")
]
),
key=lambda lang: lang[1],
),
},
}

layout = {
"Posting": ["default_post_visibility", "expand_content_warnings"],
"Posting": [
"default_post_visibility",
"expand_content_warnings",
"preferred_posting_language",
],
}
8 changes: 6 additions & 2 deletions users/views/settings/settings_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class SettingsPage(FormView):
options_class = Config.IdentityOptions
template_name = "settings/settings.html"
section: ClassVar[str]
options: dict[str, dict[str, str | int]]
options: dict[str, dict[str, str | int | list[tuple[int | str, str]]]]
layout: dict[str, list[str]]

def get_form_class(self):
Expand All @@ -42,7 +42,11 @@ def get_form_class(self):
elif config_field.type_ is UploadedImage:
form_field = forms.ImageField
elif config_field.type_ is str:
if details.get("display") == "textarea":
choices = details.get("choices")
if choices:
field_kwargs["widget"] = forms.Select(choices=choices)
form_field = forms.CharField
elif details.get("display") == "textarea":
form_field = partial(
forms.CharField,
widget=forms.Textarea,
Expand Down