disqus2data.py (Source)

	`import json`
	`import logging`
	`from collections import defaultdict`

	`import parse`

	`import conf`


	`def clean_url(u):`
	`if "//" in u:`
	`return u.split("//", 1)[1].split("?", 1)[0]`
	`return u`


	`BASE_URL = clean_url(conf.SITE_URL)`

	`data = json.load(open("disqus-data.json"))`

	`# Each fix is a parse pattern and a format pattern. Get data using one, set data using the other.`
	`thread_link_fixes = [`
	`[`
	`"{}://web.archive.org/web/{:d}/http://lateral.netmanagers.com.ar/{path}",`
	`"http://ralsina.me/{path}",`
	`],`
	`["{}://lateral.netmanagers.com.ar/{path}", "http://ralsina.me/{path}"],`
	`["{}://localhost:8080/{path}", "http://ralsina.me/{path}"],`
	`[`
	`"http://feedproxy.google.com/~r/LateralOpinion/~3/{}/{path}",`
	`"http://ralsina.me/weblog/posts/{path}",`
	`],`
	`[`
	`"https://disqus.com/home/discussion/lateralopinion/bb{numero1:d}_{numero2:d}/",`
	`"http://ralsina.me/weblog/posts/BB{numero1}.{numero2:02d}.html",`
	`],`
	`["{}://example.com/posts/{file}", "http://ralsina.me/weblog/posts/{file}"],`
	`["{start}/bbs{id:d}", "{start}/BBS{id:d}"], # bbs -> BBS`
	`[`
	`"{}://ralsina.me/tr/es/{path}",`
	`"http://ralsina.me/{path}",`
	`], # unify translation comments`
	`[`
	`"{}://ralsina.me/weblog/posts/{folder}/",`
	`"http://ralsina.me/weblog/posts/{folder}.html",`
	`], # unprettify URLs`
	`[`
	`"https://ralsina.me/weblog/posts/old-guy-the-terminal-ep-2-puede-fallar.html",`
	`"https://ralsina.me/weblog/posts/old-guy-the-terminal-ep-3-puede-fallar.html",`
	`],`
	`["https://ralsina.me/stories/nombres.html", "https://ralsina.me/stories/nombres/"],`
	`]`

	`# Ancient test threads`
	`ignore_links = {`
	`"http://ralsina.me/weblog/posts/xxx.html",`
	`"http://ubuntuone.com/p/lKX/",`
	`"http://ralsina.me/weblog/posts/index.html",`
	`}`

	`# Collect all the post/page URLs in the site`
	`site_urls = {}`
	`for p in site.timeline:`
	`site_urls[clean_url(p.permalink(absolute=True))] = p`

	`posts_per_thread = defaultdict(list)`
	`posts_by_id = {}`
	`for post in data["disqus"]["post"]:`
	`posts_per_thread[post["thread"]["@dsq:id"]].append(post)`
	`posts_by_id[post["@dsq:id"]] = post`

	`threads = {}`

	`for t in data["disqus"]["thread"]:`
	`if t["@dsq:id"] not in posts_per_thread:`
	`# Empty thread, don't care`
	`continue`
	`if t["link"] in ignore_links:`
	`print(f'Ignoring {t["link"]}')`
	`continue`
	`if not clean_url(t["link"]) in site_urls:`
	`for parser, formatter in thread_link_fixes:`
	`parsed = parse.parse(parser, t["link"])`
	`if parsed is not None:`
	`t["link"] = formatter.format(**parsed.named)`
	`if clean_url(t["link"]) in site_urls:`
	`break`
	`else:`
	`print(f'Unfixed thread link: {t["link"]}')`
	`threads[t["@dsq:id"]] = t`


	`def find_post_for_thread(thread, site):`
	`link = thread['link']`
	`for p in site.timeline:`
	`if p.permalink(absolute=True).split('//')[1] == link.split('//')[1]:`
	`return p`

	`for t in threads:`
	`post = find_post_for_thread(threads[t], site)`
	`if post is None:`
	`print('Orphan thread ===>', threads[t])`
	`continue`
	`base_path = post.source_path.split('.')[0]`
	`for post in posts_per_thread[t]:`
	`if post['isDeleted'] == 'true':`
	`continue`
	`comment_path = f"{base_path}.{post['@dsq:id']}.wpcomment"`
	`print(comment_path)`
	`with open(comment_path, "w") as outf:`
	`output = f""".. id: {post['@dsq:id']}`
	`.. approved: True`
	`.. author: {post['author']['name']}`
	`.. date_utc: {post['createdAt']}`
	`.. compiler: html`

	`{post['message']}"""`
	`if 'parent' in post:`
	`parent = post['parent']['@dsq:id'].split('=')[-1]`
	`output = f".. parent_id: {parent}\n" + output`

	`outf.write(output)`

Ralsina.Me — Roberto Alsina's website

disqus2data.py (Source)