Migrate to Hugo
This blog is running Hugo with an auto deploy via Wercker and hosted on Github Page. Take #2
Previous blog was hosted on Google App Engine with a Python blog system, to get the previous articles, I had to run in a small data migration.
Make a backup from GAE admin web interface: Go to Datastore admin and backup your entity mine was Post and then backup to blobstore, go to Blob Viewer and download your file named around datastore_backup_datastore_backup_2015_08_02_Post-157413521680733022360302ADC43E4-output-1-attempt-1
I put the migration code I’ve used here, it’s super ugly but helped me to migrate from GAE to Hugo, so it may be help you too.
I’ve used html2text to convert my HTML data back to Markdown.
import sys
import os
import json
import html2text
import errno
import datetime
sys.path.append('/usr/local/google_appengine')
from google.appengine.api.files import records
from google.appengine.datastore import entity_pb
from google.appengine.api import datastore
def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else: raise
raw = open("datastore", 'r')
titles = open("titles.txt", 'r').readlines()
reader = records.RecordsReader(raw)
i = 0
for record in reader:
entity_proto = entity_pb.EntityProto(contents=record)
entity = datastore.Entity.FromPb(entity_proto)
if entity.get("status") == 1:
path = titles[i].rstrip()
content = html2text.html2text(entity["content_html"], "http://blog.nobugware.com/")
directory = os.path.dirname(path)
mkdir_p("post" + directory)
# get current local time and utc time
localnow = datetime.datetime.now()
utcnow = datetime.datetime.utcnow()
# compute the time difference in seconds
tzd = localnow - utcnow
secs = tzd.days * 24 * 3600 + tzd.seconds
# get a positive or negative prefix
prefix = '+'
if secs < 0:
prefix = '-'
secs = abs(secs)
# print the local time with the difference, correctly formatted
suffix = "%s%02d:%02d" % (prefix, secs/3600, secs/60%60)
now = localnow.replace(microsecond=0)
date = "%s%s" % (entity["creation_date"].isoformat(' '), suffix)
tags_string = ""
tags_cleaned = []
if entity.get("tags") is not None:
tags = entity.get("tags")
for tag in tags:
tags_cleaned.append("\""+ tag + "\"")
tags_string = ",".join(tags_cleaned)
print tags_string
page = """+++
date = "%s"
title = "%s"
tags = [%s]
+++
%s
""" % ( date, entity["title"] , tags_string, content)
md=open("post" + path + ".md", 'w')
md.write(page.encode('utf8'))
md.close()
i = i + 1