|
3 | 3 | namespace App\Jobs\Data\Goodreads; |
4 | 4 |
|
5 | 5 | use App\Jobs\Base\BaseProcessingJob; |
| 6 | +use App\Models\EventObject; |
6 | 7 | use Carbon\Carbon; |
7 | 8 | use DOMDocument; |
8 | 9 | use DOMXPath; |
@@ -62,16 +63,20 @@ protected function process(): void |
62 | 63 |
|
63 | 64 | // Build target (book) |
64 | 65 | $bookTitle = $descriptionData['bookTitle'] ?? $parsedData['bookTitle'] ?? 'Unknown Book'; |
| 66 | + $bookUrl = $descriptionData['bookUrl'] ?? $link; |
| 67 | + $bookId = $this->extractBookId($bookUrl); |
| 68 | + |
65 | 69 | $target = [ |
66 | 70 | 'concept' => 'document', |
67 | 71 | 'type' => 'goodreads_book', |
68 | 72 | 'title' => $bookTitle, |
69 | 73 | 'content' => null, |
70 | 74 | 'metadata' => [ |
| 75 | + 'book_id' => $bookId, |
71 | 76 | 'author' => $descriptionData['authorName'] ?? null, |
72 | 77 | 'author_url' => $descriptionData['authorUrl'] ?? null, |
73 | 78 | ], |
74 | | - 'url' => $descriptionData['bookUrl'] ?? $link, |
| 79 | + 'url' => $bookUrl, |
75 | 80 | 'image_url' => $descriptionData['coverUrl'] ?? null, |
76 | 81 | 'time' => $pubDate ? Carbon::parse($pubDate) : now(), |
77 | 82 | ]; |
@@ -146,6 +151,47 @@ protected function process(): void |
146 | 151 | } |
147 | 152 | } |
148 | 153 |
|
| 154 | + /** |
| 155 | + * Override createOrUpdateObject to handle book deduplication by book_id |
| 156 | + */ |
| 157 | + protected function createOrUpdateObject(array $objectData): EventObject |
| 158 | + { |
| 159 | + // For Goodreads books, check if object with same book_id already exists |
| 160 | + if ($objectData['type'] === 'goodreads_book' && isset($objectData['metadata']['book_id'])) { |
| 161 | + $bookId = $objectData['metadata']['book_id']; |
| 162 | + |
| 163 | + // Find existing book by book_id in metadata |
| 164 | + $existingBook = EventObject::where('user_id', $this->integration->user_id) |
| 165 | + ->where('concept', $objectData['concept']) |
| 166 | + ->where('type', $objectData['type']) |
| 167 | + ->whereJsonContains('metadata->book_id', $bookId) |
| 168 | + ->first(); |
| 169 | + |
| 170 | + if ($existingBook) { |
| 171 | + // Keep the longer title (handles truncated titles from RSS feed) |
| 172 | + $newTitle = $objectData['title']; |
| 173 | + $existingTitle = $existingBook->title; |
| 174 | + $titleToKeep = mb_strlen($newTitle) > mb_strlen($existingTitle) ? $newTitle : $existingTitle; |
| 175 | + |
| 176 | + // Update the existing book with new data |
| 177 | + $existingBook->update([ |
| 178 | + 'time' => $objectData['time'] ?? now(), |
| 179 | + 'title' => $titleToKeep, |
| 180 | + 'content' => $objectData['content'] ?? null, |
| 181 | + 'metadata' => array_merge($existingBook->metadata ?? [], $objectData['metadata'] ?? []), |
| 182 | + 'url' => $objectData['url'] ?? $existingBook->url, |
| 183 | + 'media_url' => $objectData['image_url'] ?? $existingBook->media_url, |
| 184 | + 'embeddings' => $objectData['embeddings'] ?? $existingBook->embeddings, |
| 185 | + ]); |
| 186 | + |
| 187 | + return $existingBook; |
| 188 | + } |
| 189 | + } |
| 190 | + |
| 191 | + // Fall back to parent method for other object types |
| 192 | + return parent::createOrUpdateObject($objectData); |
| 193 | + } |
| 194 | + |
149 | 195 | /** |
150 | 196 | * Parse the RSS item title to extract action type and book information |
151 | 197 | */ |
@@ -316,4 +362,22 @@ private function getFullSizeCoverUrl(?string $url): ?string |
316 | 362 | // Remove size suffixes like _SX98_, _SY475_, etc. |
317 | 363 | return preg_replace('/\._[A-Z]{2}\d+_\./', '.', $url); |
318 | 364 | } |
| 365 | + |
| 366 | + /** |
| 367 | + * Extract book ID from Goodreads book URL |
| 368 | + * Example: https://www.goodreads.com/book/show/25792894-kings-rising -> 25792894 |
| 369 | + */ |
| 370 | + private function extractBookId(?string $url): ?string |
| 371 | + { |
| 372 | + if (! $url) { |
| 373 | + return null; |
| 374 | + } |
| 375 | + |
| 376 | + // Match pattern: /book/show/{book_id}-{slug} or /book/show/{book_id} |
| 377 | + if (preg_match('/\/book\/show\/(\d+)(?:-|$)/', $url, $matches)) { |
| 378 | + return $matches[1]; |
| 379 | + } |
| 380 | + |
| 381 | + return null; |
| 382 | + } |
319 | 383 | } |
0 commit comments