1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
import datetime
from dataclasses import dataclass
from functools import wraps
from django.contrib.sites.shortcuts import get_current_site
from django.core.paginator import EmptyPage, PageNotAnInteger
from django.http import Http404
from django.template.response import TemplateResponse
from django.urls import reverse
from django.utils import timezone
from django.utils.http import http_date
@dataclass
class SitemapIndexItem:
location: str
last_mod: bool = None
def x_robots_tag(func):
@wraps(func)
def inner(request, *args, **kwargs):
response = func(request, *args, **kwargs)
response.headers["X-Robots-Tag"] = "noindex, noodp, noarchive"
return response
return inner
def _get_latest_lastmod(current_lastmod, new_lastmod):
"""
Returns the latest `lastmod` where `lastmod` can be either a date or a
datetime.
"""
if not isinstance(new_lastmod, datetime.datetime):
new_lastmod = datetime.datetime.combine(new_lastmod, datetime.time.min)
if timezone.is_naive(new_lastmod):
new_lastmod = timezone.make_aware(new_lastmod, datetime.timezone.utc)
return new_lastmod if current_lastmod is None else max(current_lastmod, new_lastmod)
@x_robots_tag
def index(
request,
sitemaps,
template_name="sitemap_index.xml",
content_type="application/xml",
sitemap_url_name="django.contrib.sitemaps.views.sitemap",
):
req_protocol = request.scheme
req_site = get_current_site(request)
sites = [] # all sections' sitemap URLs
all_indexes_lastmod = True
latest_lastmod = None
for section, site in sitemaps.items():
# For each section label, add links of all pages of its sitemap
# (usually generated by the `sitemap` view).
if callable(site):
site = site()
protocol = req_protocol if site.protocol is None else site.protocol
sitemap_url = reverse(sitemap_url_name, kwargs={"section": section})
absolute_url = "%s://%s%s" % (protocol, req_site.domain, sitemap_url)
site_lastmod = site.get_latest_lastmod()
if all_indexes_lastmod:
if site_lastmod is not None:
latest_lastmod = _get_latest_lastmod(latest_lastmod, site_lastmod)
else:
all_indexes_lastmod = False
sites.append(SitemapIndexItem(absolute_url, site_lastmod))
# Add links to all pages of the sitemap.
for page in range(2, site.paginator.num_pages + 1):
sites.append(
SitemapIndexItem("%s?p=%s" % (absolute_url, page), site_lastmod)
)
# If lastmod is defined for all sites, set header so as
# ConditionalGetMiddleware is able to send 304 NOT MODIFIED
if all_indexes_lastmod and latest_lastmod:
headers = {"Last-Modified": http_date(latest_lastmod.timestamp())}
else:
headers = None
return TemplateResponse(
request,
template_name,
{"sitemaps": sites},
content_type=content_type,
headers=headers,
)
@x_robots_tag
def sitemap(
request,
sitemaps,
section=None,
template_name="sitemap.xml",
content_type="application/xml",
):
req_protocol = request.scheme
req_site = get_current_site(request)
if section is not None:
if section not in sitemaps:
raise Http404("No sitemap available for section: %r" % section)
maps = [sitemaps[section]]
else:
maps = sitemaps.values()
page = request.GET.get("p", 1)
lastmod = None
all_sites_lastmod = True
urls = []
for site in maps:
try:
if callable(site):
site = site()
urls.extend(site.get_urls(page=page, site=req_site, protocol=req_protocol))
if all_sites_lastmod:
site_lastmod = getattr(site, "latest_lastmod", None)
if site_lastmod is not None:
lastmod = _get_latest_lastmod(lastmod, site_lastmod)
else:
all_sites_lastmod = False
except EmptyPage:
raise Http404("Page %s empty" % page)
except PageNotAnInteger:
raise Http404("No page '%s'" % page)
# If lastmod is defined for all sites, set header so as
# ConditionalGetMiddleware is able to send 304 NOT MODIFIED
if all_sites_lastmod:
headers = {"Last-Modified": http_date(lastmod.timestamp())} if lastmod else None
else:
headers = None
return TemplateResponse(
request,
template_name,
{"urlset": urls},
content_type=content_type,
headers=headers,
)
|