mirror of
https://github.com/kremalicious/blog.git
synced 2024-11-22 09:56:51 +01:00
robots.txt, sitemap plugin
This commit is contained in:
parent
66c5a7c269
commit
fad55872a9
311
_src/_plugins/sitemap_generator.rb
Normal file
311
_src/_plugins/sitemap_generator.rb
Normal file
@ -0,0 +1,311 @@
|
|||||||
|
# Sitemap.xml Generator is a Jekyll plugin that generates a sitemap.xml file by
|
||||||
|
# traversing all of the available posts and pages.
|
||||||
|
#
|
||||||
|
# How To Use:
|
||||||
|
# 1) Copy source file into your _plugins folder within your Jekyll project.
|
||||||
|
# 2) Change modify the url variable in _config.yml to reflect your domain name.
|
||||||
|
# 3) Run Jekyll: jekyll --server to re-generate your site.
|
||||||
|
#
|
||||||
|
# Variables:
|
||||||
|
# * Change SITEMAP_FILE_NAME if you want your sitemap to be called something
|
||||||
|
# other than sitemap.xml.
|
||||||
|
# * Change the PAGES_INCLUDE_POSTS list to include any pages that are looping
|
||||||
|
# through your posts (e.g. "index.html", "archive.html", etc.). This will
|
||||||
|
# ensure that right after you make a new post, the last modified date will
|
||||||
|
# be updated to reflect the new post.
|
||||||
|
# * A sitemap.xml should be included in your _site folder.
|
||||||
|
# * If there are any files you don't want included in the sitemap, add them
|
||||||
|
# to the EXCLUDED_FILES list. The name should match the name of the source
|
||||||
|
# file.
|
||||||
|
# * If you want to include the optional changefreq and priority attributes,
|
||||||
|
# simply include custom variables in the YAML Front Matter of that file.
|
||||||
|
# The names of these custom variables are defined below in the
|
||||||
|
# CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME and PRIORITY_CUSTOM_VARIABLE_NAME
|
||||||
|
# constants.
|
||||||
|
#
|
||||||
|
# Notes:
|
||||||
|
# * The last modified date is determined by the latest from the following:
|
||||||
|
# system modified date of the page or post, system modified date of
|
||||||
|
# included layout, system modified date of included layout within that
|
||||||
|
# layout, ...
|
||||||
|
#
|
||||||
|
# Author: Michael Levin
|
||||||
|
# Site: http://www.kinnetica.com
|
||||||
|
# Distributed Under A Creative Commons License
|
||||||
|
# - http://creativecommons.org/licenses/by/3.0/
|
||||||
|
#
|
||||||
|
# Modified for Octopress by John W. Long
|
||||||
|
#
|
||||||
|
require 'rexml/document'
|
||||||
|
require 'fileutils'
|
||||||
|
|
||||||
|
module Jekyll
|
||||||
|
|
||||||
|
# Change SITEMAP_FILE_NAME if you would like your sitemap file
|
||||||
|
# to be called something else
|
||||||
|
SITEMAP_FILE_NAME = "sitemap.xml"
|
||||||
|
|
||||||
|
# Any files to exclude from being included in the sitemap.xml
|
||||||
|
EXCLUDED_FILES = ["feed.xml"]
|
||||||
|
|
||||||
|
# Any files that include posts, so that when a new post is added, the last
|
||||||
|
# modified date of these pages should take that into account
|
||||||
|
PAGES_INCLUDE_POSTS = ["index.html"]
|
||||||
|
|
||||||
|
# Custom variable names for changefreq and priority elements
|
||||||
|
# These names are used within the YAML Front Matter of pages or posts
|
||||||
|
# for which you want to include these properties
|
||||||
|
CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME = "change_frequency"
|
||||||
|
PRIORITY_CUSTOM_VARIABLE_NAME = "priority"
|
||||||
|
|
||||||
|
class Post
|
||||||
|
attr_accessor :name
|
||||||
|
|
||||||
|
def full_path_to_source
|
||||||
|
File.join(@base, @name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def location_on_server
|
||||||
|
"#{site.config['url']}#{url}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Page
|
||||||
|
attr_accessor :name
|
||||||
|
|
||||||
|
def full_path_to_source
|
||||||
|
File.join(@base, @dir, @name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def location_on_server
|
||||||
|
location = "#{site.config['url']}#{@dir}#{url}"
|
||||||
|
location.gsub(/index.html$/, "")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Layout
|
||||||
|
def full_path_to_source
|
||||||
|
File.join(@base, @name)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Recover from strange exception when starting server without --auto
|
||||||
|
class SitemapFile < StaticFile
|
||||||
|
def write(dest)
|
||||||
|
begin
|
||||||
|
super(dest)
|
||||||
|
rescue
|
||||||
|
end
|
||||||
|
|
||||||
|
true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class SitemapGenerator < Generator
|
||||||
|
|
||||||
|
# Valid values allowed by sitemap.xml spec for change frequencies
|
||||||
|
VALID_CHANGE_FREQUENCY_VALUES = ["always", "hourly", "daily", "weekly",
|
||||||
|
"monthly", "yearly", "never"]
|
||||||
|
|
||||||
|
# Goes through pages and posts and generates sitemap.xml file
|
||||||
|
#
|
||||||
|
# Returns nothing
|
||||||
|
def generate(site)
|
||||||
|
sitemap = REXML::Document.new << REXML::XMLDecl.new("1.0", "UTF-8")
|
||||||
|
|
||||||
|
urlset = REXML::Element.new "urlset"
|
||||||
|
urlset.add_attribute("xmlns",
|
||||||
|
"http://www.sitemaps.org/schemas/sitemap/0.9")
|
||||||
|
|
||||||
|
@last_modified_post_date = fill_posts(site, urlset)
|
||||||
|
fill_pages(site, urlset)
|
||||||
|
|
||||||
|
sitemap.add_element(urlset)
|
||||||
|
|
||||||
|
# File I/O: create sitemap.xml file and write out pretty-printed XML
|
||||||
|
unless File.exists?(site.dest)
|
||||||
|
FileUtils.mkdir_p(site.dest)
|
||||||
|
end
|
||||||
|
file = File.new(File.join(site.dest, SITEMAP_FILE_NAME), "w")
|
||||||
|
formatter = REXML::Formatters::Pretty.new(4)
|
||||||
|
formatter.compact = true
|
||||||
|
formatter.write(sitemap, file)
|
||||||
|
file.close
|
||||||
|
|
||||||
|
# Keep the sitemap.xml file from being cleaned by Jekyll
|
||||||
|
site.static_files << Jekyll::SitemapFile.new(site, site.dest, "/", SITEMAP_FILE_NAME)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Create url elements for all the posts and find the date of the latest one
|
||||||
|
#
|
||||||
|
# Returns last_modified_date of latest post
|
||||||
|
def fill_posts(site, urlset)
|
||||||
|
last_modified_date = nil
|
||||||
|
site.posts.each do |post|
|
||||||
|
if !excluded?(post.name)
|
||||||
|
url = fill_url(site, post)
|
||||||
|
urlset.add_element(url)
|
||||||
|
end
|
||||||
|
|
||||||
|
path = post.full_path_to_source
|
||||||
|
date = File.mtime(path)
|
||||||
|
last_modified_date = date if last_modified_date == nil or date > last_modified_date
|
||||||
|
end
|
||||||
|
|
||||||
|
last_modified_date
|
||||||
|
end
|
||||||
|
|
||||||
|
# Create url elements for all the normal pages and find the date of the
|
||||||
|
# index to use with the pagination pages
|
||||||
|
#
|
||||||
|
# Returns last_modified_date of index page
|
||||||
|
def fill_pages(site, urlset)
|
||||||
|
site.pages.each do |page|
|
||||||
|
if !excluded?(page.name)
|
||||||
|
path = page.full_path_to_source
|
||||||
|
if File.exists?(path)
|
||||||
|
url = fill_url(site, page)
|
||||||
|
urlset.add_element(url)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Fill data of each URL element: location, last modified,
|
||||||
|
# change frequency (optional), and priority.
|
||||||
|
#
|
||||||
|
# Returns url REXML::Element
|
||||||
|
def fill_url(site, page_or_post)
|
||||||
|
url = REXML::Element.new "url"
|
||||||
|
|
||||||
|
loc = fill_location(page_or_post)
|
||||||
|
url.add_element(loc)
|
||||||
|
|
||||||
|
lastmod = fill_last_modified(site, page_or_post)
|
||||||
|
url.add_element(lastmod) if lastmod
|
||||||
|
|
||||||
|
if (page_or_post.data[CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME])
|
||||||
|
change_frequency =
|
||||||
|
page_or_post.data[CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME].downcase
|
||||||
|
|
||||||
|
if (valid_change_frequency?(change_frequency))
|
||||||
|
changefreq = REXML::Element.new "changefreq"
|
||||||
|
changefreq.text = change_frequency
|
||||||
|
url.add_element(changefreq)
|
||||||
|
else
|
||||||
|
puts "ERROR: Invalid Change Frequency In #{page_or_post.name}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if (page_or_post.data[PRIORITY_CUSTOM_VARIABLE_NAME])
|
||||||
|
priority_value = page_or_post.data[PRIORITY_CUSTOM_VARIABLE_NAME]
|
||||||
|
if valid_priority?(priority_value)
|
||||||
|
priority = REXML::Element.new "priority"
|
||||||
|
priority.text = page_or_post.data[PRIORITY_CUSTOM_VARIABLE_NAME]
|
||||||
|
url.add_element(priority)
|
||||||
|
else
|
||||||
|
puts "ERROR: Invalid Priority In #{page_or_post.name}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
url
|
||||||
|
end
|
||||||
|
|
||||||
|
# Get URL location of page or post
|
||||||
|
#
|
||||||
|
# Returns the location of the page or post
|
||||||
|
def fill_location(page_or_post)
|
||||||
|
loc = REXML::Element.new "loc"
|
||||||
|
loc.text = page_or_post.location_on_server
|
||||||
|
|
||||||
|
loc
|
||||||
|
end
|
||||||
|
|
||||||
|
# Fill lastmod XML element with the last modified date for the page or post.
|
||||||
|
#
|
||||||
|
# Returns lastmod REXML::Element or nil
|
||||||
|
def fill_last_modified(site, page_or_post)
|
||||||
|
path = page_or_post.full_path_to_source
|
||||||
|
|
||||||
|
lastmod = REXML::Element.new "lastmod"
|
||||||
|
date = File.mtime(path)
|
||||||
|
latest_date = find_latest_date(date, site, page_or_post)
|
||||||
|
|
||||||
|
if @last_modified_post_date == nil
|
||||||
|
# This is a post
|
||||||
|
lastmod.text = latest_date.iso8601
|
||||||
|
else
|
||||||
|
# This is a page
|
||||||
|
if posts_included?(page_or_post.name)
|
||||||
|
# We want to take into account the last post date
|
||||||
|
final_date = greater_date(latest_date, @last_modified_post_date)
|
||||||
|
lastmod.text = final_date.iso8601
|
||||||
|
else
|
||||||
|
lastmod.text = latest_date.iso8601
|
||||||
|
end
|
||||||
|
end
|
||||||
|
lastmod
|
||||||
|
end
|
||||||
|
|
||||||
|
# Go through the page/post and any implemented layouts and get the latest
|
||||||
|
# modified date
|
||||||
|
#
|
||||||
|
# Returns formatted output of latest date of page/post and any used layouts
|
||||||
|
def find_latest_date(latest_date, site, page_or_post)
|
||||||
|
layouts = site.layouts
|
||||||
|
layout = layouts[page_or_post.data["layout"]]
|
||||||
|
while layout
|
||||||
|
path = layout.full_path_to_source
|
||||||
|
date = File.mtime(path)
|
||||||
|
|
||||||
|
latest_date = date if (date > latest_date)
|
||||||
|
|
||||||
|
layout = layouts[layout.data["layout"]]
|
||||||
|
end
|
||||||
|
|
||||||
|
latest_date
|
||||||
|
end
|
||||||
|
|
||||||
|
# Which of the two dates is later
|
||||||
|
#
|
||||||
|
# Returns latest of two dates
|
||||||
|
def greater_date(date1, date2)
|
||||||
|
if (date1 >= date2)
|
||||||
|
date1
|
||||||
|
else
|
||||||
|
date2
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Is the page or post listed as something we want to exclude?
|
||||||
|
#
|
||||||
|
# Returns boolean
|
||||||
|
def excluded?(name)
|
||||||
|
EXCLUDED_FILES.include? name
|
||||||
|
end
|
||||||
|
|
||||||
|
def posts_included?(name)
|
||||||
|
PAGES_INCLUDE_POSTS.include? name
|
||||||
|
end
|
||||||
|
|
||||||
|
# Is the change frequency value provided valid according to the spec
|
||||||
|
#
|
||||||
|
# Returns boolean
|
||||||
|
def valid_change_frequency?(change_frequency)
|
||||||
|
VALID_CHANGE_FREQUENCY_VALUES.include? change_frequency
|
||||||
|
end
|
||||||
|
|
||||||
|
# Is the priority value provided valid according to the spec
|
||||||
|
#
|
||||||
|
# Returns boolean
|
||||||
|
def valid_priority?(priority)
|
||||||
|
begin
|
||||||
|
priority_val = Float(priority)
|
||||||
|
return true if priority_val >= 0.0 and priority_val <= 1.0
|
||||||
|
rescue ArgumentError
|
||||||
|
end
|
||||||
|
|
||||||
|
false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
@ -1,18 +1,36 @@
|
|||||||
---
|
---
|
||||||
layout: none
|
layout: nil
|
||||||
---
|
---
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
<channel>
|
|
||||||
<title>{{ site.name }}</title>
|
<title type="text" xml:lang="en">{{ site.name }}</title>
|
||||||
<description>{{ site.description }}</description>
|
<link type="application/atom+xml" href="{{ site.url }}/feed/" rel="self"/>
|
||||||
<link>{{ site.url }}</link>
|
<link type="text" href="{{ site.url }}" rel="alternate"/>
|
||||||
<atom:link href="{{ site.url }}/feed.xml" rel="self" type="application/rss+xml" />
|
<updated>{{ site.time | date_to_xmlschema }}</updated>
|
||||||
|
<id>{{ site.url }}</id>
|
||||||
|
<author>
|
||||||
|
<name>{{ site.author }}</name>
|
||||||
|
</author>
|
||||||
|
|
||||||
{% for post in site.posts limit:100 %}
|
{% for post in site.posts limit:100 %}
|
||||||
<item>
|
<entry>
|
||||||
<title>{{ post.title }}</title>
|
<title>{{ post.title }}</title>
|
||||||
<description>
|
|
||||||
|
{% if post.layout == "link" %}
|
||||||
|
<link>{{ post.linkurl }}</link>
|
||||||
|
{% else %}
|
||||||
|
<link>{{ site.url }}/{{ post.url }}</link>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<published>{{ post.date | date_to_xmlschema }}</published>
|
||||||
|
|
||||||
|
{% if post.moddate %}
|
||||||
|
<updated>{{ post.moddate | date_to_xmlschema }}</updated>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<id>{{ site.url }}/{{ post.url }}</id>
|
||||||
|
<content type="html">
|
||||||
{% if post.image %}
|
{% if post.image %}
|
||||||
<img src="{{ site.url }}/media/{{ post.image }}" />
|
<img src="{{ site.url }}/media/{{ post.image }}" />
|
||||||
{% endif %}
|
{% endif %}
|
||||||
@ -23,17 +41,7 @@ layout: none
|
|||||||
<a class="more-link" href="{{ post.linkurl }}">Go to source &#187;</a> <br />
|
<a class="more-link" href="{{ post.linkurl }}">Go to source &#187;</a> <br />
|
||||||
<a href="{{ site.url }}/{{ post.url }}" title="Permalink for this post">&#8734;</a>
|
<a href="{{ site.url }}/{{ post.url }}" title="Permalink for this post">&#8734;</a>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
</content>
|
||||||
</description>
|
</entry>
|
||||||
<pubDate>{{ post.date | date: "%a, %d %b %Y %H:%M:%S %z" }}</pubDate>
|
|
||||||
{% if post.layout == "link" %}
|
|
||||||
<link>{{ post.linkurl }}</link>
|
|
||||||
{% else %}
|
|
||||||
<link>{{ site.url }}/{{ post.url }}</link>
|
|
||||||
{% endif %}
|
|
||||||
<guid isPermaLink="true">{{ site.url }}/{{ post.url }}</guid>
|
|
||||||
</item>
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
</feed>
|
||||||
</channel>
|
|
||||||
</rss>
|
|
5
_src/robots.txt
Normal file
5
_src/robots.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
User-agent: *
|
||||||
|
Disallow: /search/
|
||||||
|
Disallow: /page/
|
||||||
|
|
||||||
|
Sitemap: http://kremalicious.com/sitemap.xml
|
Loading…
Reference in New Issue
Block a user