Work on webmwtion code refactoring

This commit is contained in:
Jonny Barnes 2016-08-03 16:08:30 +01:00
parent 84c7969a4e
commit a9f089098c
8 changed files with 395 additions and 248 deletions

View file

@ -7,6 +7,8 @@ use Twitter;
use App\Tag;
use App\Note;
use Jonnybarnes\IndieWeb\Numbers;
use Illuminate\Filesystem\Filesystem;
use Jonnybarnes\WebmentionsParser\Authorship;
// Need to sort out Twitter and webmentions!
@ -23,8 +25,8 @@ class NotesController extends Controller
foreach ($notes as $note) {
$replies = 0;
foreach ($note->webmentions as $webmention) {
if ($webmention->type == 'reply') {
$replies = $replies + 1;
if ($webmention->type == 'in-reply-to') {
$replies++;
}
}
$note->replies = $replies;
@ -67,31 +69,51 @@ class NotesController extends Controller
public function singleNote($urlId)
{
$numbers = new Numbers();
$authorship = new Authorship();
$realId = $numbers->b60tonum($urlId);
$note = Note::find($realId);
$replies = [];
$reposts = [];
$likes = [];
foreach ($note->webmentions as $webmention) {
/*
reply->url |
reply->photo | Author
reply->name |
reply->source
reply->date
reply->reply
repost->url |
repost->photo | Author
repost->name |
repost->date
repost->source
like->url |
like->photo | Author
like->name |
*/
$microformats = json_decode($webmention->mf2);
$authorHCard = $authorship->findAuthor($microformats);
$content['url'] = $authorHCard['properties']['url'][0];
$content['photo'] = $this->createPhotoLink($authorHCard['properties']['photo'][0]);
$content['name'] = $authorHCard['properties']['name'][0];
switch ($webmention->type) {
case 'reply':
$content = unserialize($webmention->content);
$content['source'] = $this->bridgyReply($webmention->source);
$content['photo'] = $this->createPhotoLink($content['photo']);
case 'in-reply-to':
$content['source'] = $webmention->source;
$content['date'] = $carbon->parse($content['date'])->toDayDateTimeString();
$content['reply'] = $microformats['items'][0]['properties']['content'][0]['html_purified'];
$replies[] = $content;
break;
case 'repost':
$content = unserialize($webmention->content);
$content['photo'] = $this->createPhotoLink($content['photo']);
case 'repost-of':
$content['date'] = $carbon->parse($content['date'])->toDayDateTimeString();
$content['source'] = $webmention->source;
$reposts[] = $content;
break;
case 'like':
$content = unserialize($webmention->content);
$content['photo'] = $this->createPhotoLink($content['photo']);
case 'like-of':
$likes[] = $content;
break;
}
@ -164,41 +186,43 @@ class NotesController extends Controller
return view('taggednotes', ['notes' => $notes, 'tag' => $tag]);
}
/**
* Swap a brid.gy URL shim-ing a twitter reply to a real twitter link.
*
* @param string
* @return string
*/
public function bridgyReply($source)
{
$url = $source;
if (mb_substr($source, 0, 28, 'UTF-8') == 'https://brid-gy.appspot.com/') {
$parts = explode('/', $source);
$tweetId = array_pop($parts);
if ($tweetId) {
$url = 'https://twitter.com/_/status/' . $tweetId;
}
}
return $url;
}
/**
* Create the photo link.
*
* We shall leave twitter.com and twimg.com links as they are. Then we shall
* check for local copies, if that fails leave the link as is.
*
* @param string
* @return string
*/
public function createPhotoLink($url)
{
$host = parse_url($url)['host'];
if ($host != 'twitter.com' && $host != 'pbs.twimg.com') {
return '/assets/profile-images/' . $host . '/image';
}
if (mb_substr($url, 0, 20) == 'http://pbs.twimg.com') {
$host = parse_url($url, PHP_URL_HOST);
if ($host == 'pbs.twimg.com') {
//make sure we use HTTPS, we know twitter supports it
return str_replace('http://', 'https://', $url);
}
if ($host == 'twitter.com') {
if (Cache::has($url)) {
return Cache::get($url);
}
$username = parse_url($url, PHP_URL_PATH);
try {
$info = Twitter::getUsers(['screen_name' => $username]);
$profile_image = $info->profile_image_url_https;
Cache::put($url, $profile_image, 10080); //1 week
} catch (Exception $e) {
return $url; //not sure here
}
return $profile_image;
}
$filesystem = new Filesystem();
if ($filesystem->exists(public_path() . '/assets/profile-images/' . $host . '/image')) {
return '/assets/profile-images/' . $host . '/image';
}
return $url;
}
/**

View file

@ -13,13 +13,16 @@ use Illuminate\Queue\InteractsWithQueue;
use Jonnybarnes\WebmentionsParser\Parser;
use GuzzleHttp\Exception\RequestException;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\DispatchesJobs;
use App\Exceptions\RemoteContentNotFoundException;
class ProcessWebMention extends Job implements ShouldQueue
{
use InteractsWithQueue, SerializesModels;
use InteractsWithQueue, SerializesModels, DispatchesJobs;
protected $note;
protected $source;
protected $guzzle;
/**
* Create a new job instance.
@ -28,10 +31,11 @@ class ProcessWebMention extends Job implements ShouldQueue
* @param string $source
* @return void
*/
public function __construct(Note $note, $source)
public function __construct(Note $note, $source, Client $guzzle = null)
{
$this->note = $note;
$this->source = $source;
$this->guzzle = $guzzle ?? new Client();
}
/**
@ -46,100 +50,60 @@ class ProcessWebMention extends Job implements ShouldQueue
$baseURL = $sourceURL['scheme'] . '://' . $sourceURL['host'];
$remoteContent = $this->getRemoteContent($this->source);
if ($remoteContent === null) {
return false;
throw new RemoteContentNotFoundException;
}
$mf2Parser = new Mf2\Parser($remoteContent, $baseURL, true);
$microformats = $mf2Parser->parse();
$count = WebMention::where('source', '=', $this->source)->count();
if ($count > 0) {
//we already have a webmention from this source
$webmentions = WebMention::where('source', '=', $this->source)->get();
foreach ($webmentions as $webmention) {
//now check it still 'mentions' this target
//we switch for each type of mention (reply/like/repost)
switch ($webmention->type) {
case 'reply':
if ($parser->checkInReplyTo($microformats, $note->longurl) == false) {
//it doesn't so delete
$webmention->delete();
$microformats = Mf2\parse($remoteContent, $baseURL);
$webmentions = WebMention::where('source', $this->source)->get();
foreach ($webmentions as $webmention) {
//check webmention still references target
//we try each type of mention (reply/like/repost)
if ($webmention->type == 'in-reply-to') {
if ($parser->checkInReplyTo($microformats, $this->note->longurl) == false) {
//it doesn't so delete
$webmention->delete();
return true;
}
//webmenion is still a reply, so update content
$content = $parser->replyContent($microformats);
$this->saveImage($content);
$content['reply'] = $this->filterHTML($content['reply']);
$content = serialize($content);
$webmention->content = $content;
$webmention->save();
return;
}
//webmenion is still a reply, so update content
$microformats = $this->filterHTML($microformats);
$this->dispatch(new SaveProfileImage($microformats));
$webmention->mf2 = json_encode($microformats);
$webmention->save();
return true;
break;
case 'like':
if ($parser->checkLikeOf($microformats, $note->longurl) == false) {
//it doesn't so delete
$webmention->delete();
return;
}
if ($webmention->type == 'like-of') {
if ($parser->checkLikeOf($microformats, $note->longurl) == false) {
//it doesn't so delete
$webmention->delete();
return true;
} //note we don't need to do anything if it still is a like
break;
case 'repost':
if ($parser->checkRepostOf($microformats, $note->longurl) == false) {
//it doesn't so delete
$webmention->delete();
return;
} //note we don't need to do anything if it still is a like
}
if ($webmention->type == 'repost-of') {
if ($parser->checkRepostOf($microformats, $note->longurl) == false) {
//it doesn't so delete
$webmention->delete();
return;
} //again, we don't need to do anything if it still is a repost
}
}//foreach
return true;
} //again, we don't need to do anything if it still is a repost
break;
}//switch
}//foreach
}//if
//no wemention in db so create new one
$webmention = new WebMention();
//check it is in fact a reply
if ($parser->checkInReplyTo($microformats, $note->longurl)) {
$content = $parser->replyContent($microformats);
$this->saveImage($content);
$content['reply'] = $this->filterHTML($content['reply']);
$content = serialize($content);
$webmention->source = $this->source;
$webmention->target = $note->longurl;
$webmention->commentable_id = $this->note->id;
$webmention->commentable_type = 'App\Note';
$webmention->type = 'reply';
$webmention->content = $content;
$webmention->save();
$type = $parser->getMentionType($microformats); //throw error here?
$this->dispatch(new SaveProfileImage($microformats));
$microformats = $this->filterHTML($microformats);
$webmention->source = $this->source;
$webmention->target = $this->note->longurl;
$webmention->commentable_id = $this->note->id;
$webmention->commentable_type = 'App\Note';
$webmention->type = $type;
$webmention->mf2 = json_encode($microformats);
$webmention->save();
return true;
} elseif ($parser->checkLikeOf($microformats, $note->longurl)) {
//it is a like
$content = $parser->likeContent($microformats);
$this->saveImage($content);
$content = serialize($content);
$webmention->source = $this->source;
$webmention->target = $note->longurl;
$webmention->commentable_id = $this->note->id;
$webmention->commentable_type = 'App\Note';
$webmention->type = 'like';
$webmention->content = $content;
$webmention->save();
return true;
} elseif ($parser->checkRepostOf($microformats, $note->longurl)) {
//it is a repost
$content = $parser->repostContent($microformats);
$this->saveImage($content);
$content = serialize($content);
$webmention->source = $this->source;
$webmention->target = $note->longurl;
$webmention->commentable_id = $this->note->id;
$webmention->commentable_type = 'App\Note';
$webmention->type = 'repost';
$webmention->content = $content;
$webmention->save();
return true;
}
return;
}
/**
@ -150,16 +114,20 @@ class ProcessWebMention extends Job implements ShouldQueue
*/
private function getRemoteContent($url)
{
$client = new Client();
try {
$response = $client->request('GET', $url);
$response = $this->guzzle->request('GET', $url);
} catch (RequestException $e) {
return;
}
$html = (string) $response->getBody();
$path = storage_path() . '/HTML/' . $this->createFilenameFromURL($url);
$this->fileForceContents($path, $html);
$parts = explode('/', $path);
$name = array_pop($parts);
$dir = implode('/', $parts);
if (! is_dir($dir)) {
mkdir($dir, 0755, true);
}
file_put_contents("$dir/$name", $html);
return $html;
}
@ -182,65 +150,29 @@ class ProcessWebMention extends Job implements ShouldQueue
}
/**
* Save a file, and create any necessary folders.
* Filter the HTML in a reply webmention.
*
* @param string The directory to save to
* @param binary The file to save
* @param array The unfiltered microformats
* @return array The filtered microformats
*/
private function fileForceContents($dir, $contents)
private function filterHTML($microformats)
{
$parts = explode('/', $dir);
$name = array_pop($parts);
$dir = implode('/', $parts);
if (! is_dir($dir)) {
mkdir($dir, 0755, true);
if (isset($microformats['items'][0]['properties']['content'][0]['html'])) {
$microformats['items'][0]['properties']['content'][0]['html_purified'] = $this->useHTMLPurifier(
$microformats['items'][0]['properties']['content'][0]['html']
);
}
file_put_contents("$dir/$name", $contents);
return $microformats;
}
/**
* Save a profile image to the local cache.
*
* @param array source content
* @return bool wether image was saved or not (we dont save twitter profiles)
*/
public function saveImage(array $content)
{
$photo = $content['photo'];
$home = $content['url'];
//dont save pbs.twimg.com links
if (parse_url($photo)['host'] != 'pbs.twimg.com'
&& parse_url($photo)['host'] != 'twitter.com') {
$client = new Client();
try {
$response = $client->get($photo);
$image = $response->getBody(true);
$path = public_path() . '/assets/profile-images/' . parse_url($home)['host'] . '/image';
$this->fileForceContents($path, $image);
} catch (Exception $e) {
// we are openning and reading the default image so that
// fileForceContent work
$default = public_path() . '/assets/profile-images/default-image';
$handle = fopen($default, 'rb');
$image = fread($handle, filesize($default));
fclose($handle);
$path = public_path() . '/assets/profile-images/' . parse_url($home)['host'] . '/image';
$this->fileForceContents($path, $image);
}
return true;
}
return false;
}
/**
* Purify HTML received from a webmention.
* Set up and use HTMLPurifer on some HTML.
*
* @param string The HTML to be processed
* @return string The processed HTML
*/
public function filterHTML($html)
private function useHTMLPurifier($html)
{
$config = HTMLPurifier_Config::createDefault();
$config->set('Cache.SerializerPath', storage_path() . '/HTMLPurifier');

View file

@ -0,0 +1,67 @@
<?php
namespace App\Jobs;
use App\Jobs\Job;
use Illuminate\Queue\SerializesModels;
use Illuminate\Queue\InteractsWithQueue;
use GuzzleHttp\Exception\RequestException;
use Illuminate\Contracts\Queue\ShouldQueue;
use Jonnybarnes\WebmentionsParser\Authorship;
use Jonnybarnes\WebmentionsParser\Exceptions\AuthorshipParserException;
class SaveProfileImage extends Job implements ShouldQueue
{
use InteractsWithQueue, SerializesModels;
protected $microformats;
/**
* Create a new job instance.
*
* @return void
*/
public function __construct($microformats)
{
$this->microformats = $microformats;
}
/**
* Execute the job.
*
* @return void
*/
public function handle(Authorship $authorship)
{
try {
$author = $authorship->findAuthor($microformats);
} catch (AuthorshipParserException $e) {
return;
}
$photo = $author['properties'][0]['photo'][0];
$home = $author['properties'][0]['url'][0];
//dont save pbs.twimg.com links
if (parse_url($photo, PHP_URL_HOST) != 'pbs.twimg.com'
&& parse_url($photo, PHP_URL_HOST) != 'twitter.com') {
$client = new Client();
try {
$response = $client->get($photo);
$image = $response->getBody(true);
} catch (RequestException $e) {
// we are openning and reading the default image so that
$default = public_path() . '/assets/profile-images/default-image';
$handle = fopen($default, 'rb');
$image = fread($handle, filesize($default));
fclose($handle);
}
$path = public_path() . '/assets/profile-images/' . parse_url($home, PHP_URL_HOST) . '/image';
$parts = explode('/', $path);
$name = array_pop($parts);
$dir = implode('/', $parts);
if (! is_dir($dir)) {
mkdir($dir, 0755, true);
}
file_put_contents("$dir/$name", $image);
}
}
}