Skip to content

Commit ff9b1a9

Browse files
LegendaryLinuxqwint
authored andcommitted
WebHost: Add robots.txt to WebHost (ArchipelagoMW#3157)
* Add a `robots.txt` file to prevent crawlers from scraping the site * Added `ASSET_RIGHTS` entry to config.yaml to control whether `/robots.txt` is served or not * Always import robots.py, determine config in route function * Finish writing a comment * Remove unnecessary redundant import and config
1 parent f24a265 commit ff9b1a9

File tree

5 files changed

+44
-5
lines changed

5 files changed

+44
-5
lines changed

WebHost.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ def get_app():
2323
from WebHostLib import register, cache, app as raw_app
2424
from WebHostLib.models import db
2525

26-
register()
2726
app = raw_app
2827
if os.path.exists(configpath) and not app.config["TESTING"]:
2928
import yaml
@@ -34,6 +33,7 @@ def get_app():
3433
app.config["HOST_ADDRESS"] = Utils.get_public_ipv4()
3534
logging.info(f"HOST_ADDRESS was set to {app.config['HOST_ADDRESS']}")
3635

36+
register()
3737
cache.init_app(app)
3838
db.bind(**app.config["PONY"])
3939
db.generate_mapping(create_tables=True)

WebHostLib/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
app.config["MAX_ROLL"] = 20
5252
app.config["CACHE_TYPE"] = "SimpleCache"
5353
app.config["HOST_ADDRESS"] = ""
54+
app.config["ASSET_RIGHTS"] = False
5455

5556
cache = Cache()
5657
Compress(app)
@@ -82,6 +83,6 @@ def register():
8283

8384
from WebHostLib.customserver import run_server_process
8485
# to trigger app routing picking up on it
85-
from . import tracker, upload, landing, check, generate, downloads, api, stats, misc
86+
from . import tracker, upload, landing, check, generate, downloads, api, stats, misc, robots
8687

8788
app.register_blueprint(api.api_endpoints)

WebHostLib/robots.py

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from WebHostLib import app
2+
from flask import abort
3+
from . import cache
4+
5+
6+
@cache.cached()
7+
@app.route('/robots.txt')
8+
def robots():
9+
# If this host is not official, do not allow search engine crawling
10+
if not app.config["ASSET_RIGHTS"]:
11+
return app.send_static_file('robots.txt')
12+
13+
# Send 404 if the host has affirmed this to be the official WebHost
14+
abort(404)

WebHostLib/static/robots.txt

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
User-agent: Googlebot
2+
Disallow: /
3+
4+
User-agent: APIs-Google
5+
Disallow: /
6+
7+
User-agent: AdsBot-Google-Mobile
8+
Disallow: /
9+
10+
User-agent: AdsBot-Google-Mobile
11+
Disallow: /
12+
13+
User-agent: Mediapartners-Google
14+
Disallow: /
15+
16+
User-agent: Google-Safety
17+
Disallow: /
18+
19+
User-agent: *
20+
Disallow: /

docs/webhost configuration sample.yaml

+7-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# This is a sample configuration for the Web host.
1+
# This is a sample configuration for the Web host.
22
# If you wish to change any of these, rename this file to config.yaml
33
# Default values are shown here. Uncomment and change the values as desired.
44

@@ -25,7 +25,7 @@
2525

2626
# Secret key used to determine important things like cookie authentication of room/seed page ownership.
2727
# If you wish to deploy, uncomment the following line and set it to something not easily guessable.
28-
# SECRET_KEY: "Your secret key here"
28+
# SECRET_KEY: "Your secret key here"
2929

3030
# TODO
3131
#JOB_THRESHOLD: 2
@@ -38,7 +38,7 @@
3838
# provider: "sqlite"
3939
# filename: "ap.db3" # This MUST be the ABSOLUTE PATH to the file.
4040
# create_db: true
41-
41+
4242
# Maximum number of players that are allowed to be rolled on the server. After this limit, one should roll locally and upload the results.
4343
#MAX_ROLL: 20
4444

@@ -50,3 +50,7 @@
5050

5151
# Host Address. This is the address encoded into the patch that will be used for client auto-connect.
5252
#HOST_ADDRESS: archipelago.gg
53+
54+
# Asset redistribution rights. If true, the host affirms they have been given explicit permission to redistribute
55+
# the proprietary assets in WebHostLib
56+
#ASSET_RIGHTS: false

0 commit comments

Comments
 (0)