summaryrefslogtreecommitdiff
path: root/django/contrib/sitemaps/views.py
blob: 166563b200ab49bc68155fba80c18e1d3d9352c0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import datetime
from dataclasses import dataclass
from functools import wraps

from django.contrib.sites.shortcuts import get_current_site
from django.core.paginator import EmptyPage, PageNotAnInteger
from django.http import Http404
from django.template.response import TemplateResponse
from django.urls import reverse
from django.utils import timezone
from django.utils.http import http_date


@dataclass
class SitemapIndexItem:
    location: str
    last_mod: bool = None


def x_robots_tag(func):
    @wraps(func)
    def inner(request, *args, **kwargs):
        response = func(request, *args, **kwargs)
        response.headers["X-Robots-Tag"] = "noindex, noodp, noarchive"
        return response

    return inner


def _get_latest_lastmod(current_lastmod, new_lastmod):
    """
    Returns the latest `lastmod` where `lastmod` can be either a date or a
    datetime.
    """
    if not isinstance(new_lastmod, datetime.datetime):
        new_lastmod = datetime.datetime.combine(new_lastmod, datetime.time.min)
    if timezone.is_naive(new_lastmod):
        new_lastmod = timezone.make_aware(new_lastmod, datetime.timezone.utc)
    return new_lastmod if current_lastmod is None else max(current_lastmod, new_lastmod)


@x_robots_tag
def index(
    request,
    sitemaps,
    template_name="sitemap_index.xml",
    content_type="application/xml",
    sitemap_url_name="django.contrib.sitemaps.views.sitemap",
):
    req_protocol = request.scheme
    req_site = get_current_site(request)

    sites = []  # all sections' sitemap URLs
    all_indexes_lastmod = True
    latest_lastmod = None
    for section, site in sitemaps.items():
        # For each section label, add links of all pages of its sitemap
        # (usually generated by the `sitemap` view).
        if callable(site):
            site = site()
        protocol = req_protocol if site.protocol is None else site.protocol
        sitemap_url = reverse(sitemap_url_name, kwargs={"section": section})
        absolute_url = "%s://%s%s" % (protocol, req_site.domain, sitemap_url)
        site_lastmod = site.get_latest_lastmod()
        if all_indexes_lastmod:
            if site_lastmod is not None:
                latest_lastmod = _get_latest_lastmod(latest_lastmod, site_lastmod)
            else:
                all_indexes_lastmod = False
        sites.append(SitemapIndexItem(absolute_url, site_lastmod))
        # Add links to all pages of the sitemap.
        for page in range(2, site.paginator.num_pages + 1):
            sites.append(
                SitemapIndexItem("%s?p=%s" % (absolute_url, page), site_lastmod)
            )
    # If lastmod is defined for all sites, set header so as
    # ConditionalGetMiddleware is able to send 304 NOT MODIFIED
    if all_indexes_lastmod and latest_lastmod:
        headers = {"Last-Modified": http_date(latest_lastmod.timestamp())}
    else:
        headers = None
    return TemplateResponse(
        request,
        template_name,
        {"sitemaps": sites},
        content_type=content_type,
        headers=headers,
    )


@x_robots_tag
def sitemap(
    request,
    sitemaps,
    section=None,
    template_name="sitemap.xml",
    content_type="application/xml",
):
    req_protocol = request.scheme
    req_site = get_current_site(request)

    if section is not None:
        if section not in sitemaps:
            raise Http404("No sitemap available for section: %r" % section)
        maps = [sitemaps[section]]
    else:
        maps = sitemaps.values()
    page = request.GET.get("p", 1)

    lastmod = None
    all_sites_lastmod = True
    urls = []
    for site in maps:
        try:
            if callable(site):
                site = site()
            urls.extend(site.get_urls(page=page, site=req_site, protocol=req_protocol))
            if all_sites_lastmod:
                site_lastmod = getattr(site, "latest_lastmod", None)
                if site_lastmod is not None:
                    lastmod = _get_latest_lastmod(lastmod, site_lastmod)
                else:
                    all_sites_lastmod = False
        except EmptyPage:
            raise Http404("Page %s empty" % page)
        except PageNotAnInteger:
            raise Http404("No page '%s'" % page)
    # If lastmod is defined for all sites, set header so as
    # ConditionalGetMiddleware is able to send 304 NOT MODIFIED
    if all_sites_lastmod:
        headers = {"Last-Modified": http_date(lastmod.timestamp())} if lastmod else None
    else:
        headers = None
    return TemplateResponse(
        request,
        template_name,
        {"urlset": urls},
        content_type=content_type,
        headers=headers,
    )