|
import json
|
|
import logging
|
|
from collections import defaultdict
|
|
|
|
import parse
|
|
|
|
import conf
|
|
|
|
|
|
def clean_url(u):
|
|
if "//" in u:
|
|
return u.split("//", 1)[1].split("?", 1)[0]
|
|
return u
|
|
|
|
|
|
BASE_URL = clean_url(conf.SITE_URL)
|
|
|
|
data = json.load(open("disqus-data.json"))
|
|
|
|
# Each fix is a parse pattern and a format pattern. Get data using one, set data using the other.
|
|
thread_link_fixes = [
|
|
[
|
|
"{}://web.archive.org/web/{:d}/http://lateral.netmanagers.com.ar/{path}",
|
|
"http://ralsina.me/{path}",
|
|
],
|
|
["{}://lateral.netmanagers.com.ar/{path}", "http://ralsina.me/{path}"],
|
|
["{}://localhost:8080/{path}", "http://ralsina.me/{path}"],
|
|
[
|
|
"http://feedproxy.google.com/~r/LateralOpinion/~3/{}/{path}",
|
|
"http://ralsina.me/weblog/posts/{path}",
|
|
],
|
|
[
|
|
"https://disqus.com/home/discussion/lateralopinion/bb{numero1:d}_{numero2:d}/",
|
|
"http://ralsina.me/weblog/posts/BB{numero1}.{numero2:02d}.html",
|
|
],
|
|
["{}://example.com/posts/{file}", "http://ralsina.me/weblog/posts/{file}"],
|
|
["{start}/bbs{id:d}", "{start}/BBS{id:d}"], # bbs -> BBS
|
|
[
|
|
"{}://ralsina.me/tr/es/{path}",
|
|
"http://ralsina.me/{path}",
|
|
], # unify translation comments
|
|
[
|
|
"{}://ralsina.me/weblog/posts/{folder}/",
|
|
"http://ralsina.me/weblog/posts/{folder}.html",
|
|
], # unprettify URLs
|
|
[
|
|
"https://ralsina.me/weblog/posts/old-guy-the-terminal-ep-2-puede-fallar.html",
|
|
"https://ralsina.me/weblog/posts/old-guy-the-terminal-ep-3-puede-fallar.html",
|
|
],
|
|
["https://ralsina.me/stories/nombres.html", "https://ralsina.me/stories/nombres/"],
|
|
]
|
|
|
|
# Ancient test threads
|
|
ignore_links = {
|
|
"http://ralsina.me/weblog/posts/xxx.html",
|
|
"http://ubuntuone.com/p/lKX/",
|
|
"http://ralsina.me/weblog/posts/index.html",
|
|
}
|
|
|
|
# Collect all the post/page URLs in the site
|
|
site_urls = {}
|
|
for p in site.timeline:
|
|
site_urls[clean_url(p.permalink(absolute=True))] = p
|
|
|
|
posts_per_thread = defaultdict(list)
|
|
posts_by_id = {}
|
|
for post in data["disqus"]["post"]:
|
|
posts_per_thread[post["thread"]["@dsq:id"]].append(post)
|
|
posts_by_id[post["@dsq:id"]] = post
|
|
|
|
threads = {}
|
|
|
|
for t in data["disqus"]["thread"]:
|
|
if t["@dsq:id"] not in posts_per_thread:
|
|
# Empty thread, don't care
|
|
continue
|
|
if t["link"] in ignore_links:
|
|
print(f'Ignoring {t["link"]}')
|
|
continue
|
|
if not clean_url(t["link"]) in site_urls:
|
|
for parser, formatter in thread_link_fixes:
|
|
parsed = parse.parse(parser, t["link"])
|
|
if parsed is not None:
|
|
t["link"] = formatter.format(**parsed.named)
|
|
if clean_url(t["link"]) in site_urls:
|
|
break
|
|
else:
|
|
print(f'Unfixed thread link: {t["link"]}')
|
|
threads[t["@dsq:id"]] = t
|
|
|
|
|
|
def find_post_for_thread(thread, site):
|
|
link = thread['link']
|
|
for p in site.timeline:
|
|
if p.permalink(absolute=True).split('//')[1] == link.split('//')[1]:
|
|
return p
|
|
|
|
for t in threads:
|
|
post = find_post_for_thread(threads[t], site)
|
|
if post is None:
|
|
print('Orphan thread ===>', threads[t])
|
|
continue
|
|
base_path = post.source_path.split('.')[0]
|
|
for post in posts_per_thread[t]:
|
|
if post['isDeleted'] == 'true':
|
|
continue
|
|
comment_path = f"{base_path}.{post['@dsq:id']}.wpcomment"
|
|
print(comment_path)
|
|
with open(comment_path, "w") as outf:
|
|
output = f""".. id: {post['@dsq:id']}
|
|
.. approved: True
|
|
.. author: {post['author']['name']}
|
|
.. date_utc: {post['createdAt']}
|
|
.. compiler: html
|
|
|
|
{post['message']}"""
|
|
if 'parent' in post:
|
|
parent = post['parent']['@dsq:id'].split('=')[-1]
|
|
output = f".. parent_id: {parent}\n" + output
|
|
|
|
outf.write(output)
|