diff --git a/package-lock.json b/package-lock.json index ed46ade..c95e97f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,11 +1,14 @@ { - "name": "open-backend", + "name": "backend", + "version": "2.0.1", "lockfileVersion": 3, "requires": true, "packages": { "": { + "version": "2.0.1", "dependencies": { "@prisma/client": "^6.4.1", + "cheerio": "^1.0.0", "dotenv": "^16.4.7", "jsonwebtoken": "^9.0.2", "prom-client": "^15.1.3", @@ -2588,6 +2591,12 @@ "node": ">=18" } }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "license": "ISC" + }, "node_modules/brace-expansion": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", @@ -2834,6 +2843,48 @@ "node": ">=8" } }, + "node_modules/cheerio": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0.tgz", + "integrity": "sha512-quS9HgjQpdaXOvsZz82Oz7uxtXiy6UIsIQcpBj7HRw2M63Skasm9qlDocAM7jNuaxdhpPU7c4kJN+gA5MCu4ww==", + "license": "MIT", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.1.0", + "encoding-sniffer": "^0.2.0", + "htmlparser2": "^9.1.0", + "parse5": "^7.1.2", + "parse5-htmlparser2-tree-adapter": "^7.0.0", + "parse5-parser-stream": "^7.1.2", + "undici": "^6.19.5", + "whatwg-mimetype": "^4.0.0" + }, + "engines": { + "node": ">=18.17" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/chokidar": { "version": "4.0.3", "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz", @@ -3228,6 +3279,34 @@ "uncrypto": "^0.1.3" } }, + "node_modules/css-select": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz", + "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", + "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/db0": { "version": "0.3.1", "resolved": "https://registry.npmjs.org/db0/-/db0-0.3.1.tgz", @@ -3399,6 +3478,61 @@ "node": ">=8" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/dot-prop": { "version": "9.0.0", "resolved": "https://registry.npmjs.org/dot-prop/-/dot-prop-9.0.0.tgz", @@ -3489,6 +3623,19 @@ "node": ">= 0.8" } }, + "node_modules/encoding-sniffer": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.0.tgz", + "integrity": "sha512-ju7Wq1kg04I3HtiYIOrUrdfdDvkyO9s5XM8QAj/bN61Yo/Vb4vgJxy5vi4Yxk01gWHbrofpPtpxM8bKger9jhg==", + "license": "MIT", + "dependencies": { + "iconv-lite": "^0.6.3", + "whatwg-encoding": "^3.1.1" + }, + "funding": { + "url": "https://github.com/fb55/encoding-sniffer?sponsor=1" + } + }, "node_modules/end-of-stream": { "version": "1.4.4", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", @@ -3498,6 +3645,18 @@ "once": "^1.4.0" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/error-stack-parser-es": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/error-stack-parser-es/-/error-stack-parser-es-1.0.5.tgz", @@ -4653,6 +4812,25 @@ "dev": true, "license": "MIT" }, + "node_modules/htmlparser2": { + "version": "9.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-9.1.0.tgz", + "integrity": "sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.1.0", + "entities": "^4.5.0" + } + }, "node_modules/http-cache-semantics": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz", @@ -4735,7 +4913,6 @@ "version": "0.6.3", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", - "dev": true, "license": "MIT", "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" @@ -5922,6 +6099,18 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, "node_modules/nypm": { "version": "0.6.0", "resolved": "https://registry.npmjs.org/nypm/-/nypm-0.6.0.tgz", @@ -6148,6 +6337,55 @@ "node": ">=6" } }, + "node_modules/parse5": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", + "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==", + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz", + "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==", + "license": "MIT", + "dependencies": { + "domhandler": "^5.0.3", + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-parser-stream": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz", + "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==", + "license": "MIT", + "dependencies": { + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5/node_modules/entities": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.0.tgz", + "integrity": "sha512-aKstq2TDOndCn4diEyp9Uq/Flu2i1GlLkc6XIDQSDMuaFE3OPW5OphLCyQ5SpSJZTb4reN+kTcYru5yIfXoRPw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/parseurl": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", @@ -6844,7 +7082,6 @@ "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "dev": true, "license": "MIT" }, "node_modules/scule": { @@ -7665,6 +7902,15 @@ "@types/estree": "^1.0.0" } }, + "node_modules/undici": { + "version": "6.21.3", + "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.3.tgz", + "integrity": "sha512-gBLkYIlEnSp8pFbT64yFgGE6UIB9tAkhukC23PmMDCe5Nd+cRqKxSjw5y54MK2AZMgZfJWMaNE4nYUHgi1XEOw==", + "license": "MIT", + "engines": { + "node": ">=18.17" + } + }, "node_modules/undici-types": { "version": "6.20.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", @@ -8019,6 +8265,27 @@ "dev": true, "license": "MIT" }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "license": "MIT", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, "node_modules/whatwg-url": { "version": "14.2.0", "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.2.0.tgz", diff --git a/package.json b/package.json index 6dc88e3..e0ba9f2 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ }, "dependencies": { "@prisma/client": "^6.4.1", + "cheerio": "^1.0.0", "dotenv": "^16.4.7", "jsonwebtoken": "^9.0.2", "prom-client": "^15.1.3", diff --git a/server/routes/letterboxd/index.get.ts b/server/routes/letterboxd/index.get.ts new file mode 100644 index 0000000..fe4a390 --- /dev/null +++ b/server/routes/letterboxd/index.get.ts @@ -0,0 +1,147 @@ +import * as cheerio from 'cheerio'; +import { TMDB } from 'tmdb-ts'; +const tmdb = new TMDB(useRuntimeConfig().tmdbApiKey); + +export default defineCachedEventHandler(async (event) => { + try { + const response = await fetch('https://letterboxd.com/lists/'); + let html = await response.text(); + + html = html.replace(/\\n/g, '\n').replace(/\\t/g, '\t'); + + const $ = cheerio.load(html); + + const listItems = $('a.list-link').map((i, el) => ({ + href: $(el).attr('href'), + title: $(el).find('.list-name').text().trim() || $(el).attr('title'), + text: $(el).text().trim() + })).get(); + + if (!listItems.length) { + return { + lists: [], + error: 'No lists found' + }; + } + + const allLists = []; + + for (let i = 0; i < listItems.length; i++) { + const listItem = listItems[i]; + + if (!listItem.href) continue; + + try { + const listUrl = `https://letterboxd.com${listItem.href}`; + const listResponse = await fetch(listUrl); + let listHtml = await listResponse.text(); + + listHtml = listHtml.replace(/\\n/g, '\n').replace(/\\t/g, '\t'); + + const list$ = cheerio.load(listHtml); + + const ogTitle = list$('meta[property="og:title"]').attr('content'); + const listName = ogTitle || listItem.title; + + const listStatsText = list$('.list-meta .stats').text(); + const itemCountMatch = listStatsText.match(/(\d+)\s*film/i); + const expectedItemCount = itemCountMatch ? parseInt(itemCountMatch[1]) : null; + + const possibleFilmSelectors = [ + 'li.poster-container', + '.poster-container', + '.film-poster', + '.poster', + 'li[data-film-slug]', + '[data-film-slug]', + '.listitem', + '.list-item' + ]; + + let films = []; + let workingSelector = ''; + + for (const selector of possibleFilmSelectors) { + const elements = list$(selector); + if (elements.length > 0) { + workingSelector = selector; + films = elements.map((i, el) => { + const filmSlug = list$(el).attr('data-film-slug'); + const targetLink = list$(el).attr('data-target-link'); + const filmId = list$(el).attr('data-film-id'); + + const filmName = filmSlug + ? filmSlug.split('-').map(word => + word.charAt(0).toUpperCase() + word.slice(1) + ).join(' ') + : targetLink?.split('/film/')[1]?.split('/')[0]?.replace(/-/g, ' '); + + return { + name: filmName, + slug: filmSlug, + link: targetLink, + filmId: filmId + }; + }).get().filter(film => film.name); + + if (films.length > 0) break; + } + } + + const tmdbIds = []; + + for (const film of films) { + try { + const searchResult = await tmdb.search.movies({ query: film.name }); + + if (searchResult.results && searchResult.results.length > 0) { + const tmdbId = searchResult.results[0].id; + tmdbIds.push(tmdbId); + } + } catch (error) { + continue; + } + } + + allLists.push({ + listName: listName, + listUrl: listUrl, + tmdbIds, + metadata: { + originalFilmCount: films.length, + foundTmdbIds: tmdbIds.length, + expectedItemCount: expectedItemCount, + workingSelector + } + }); + + } catch (error) { + allLists.push({ + listName: listItem.title, + listUrl: `https://letterboxd.com${listItem.href}`, + tmdbIds: [], + metadata: { + originalFilmCount: 0, + foundTmdbIds: 0, + expectedItemCount: null, + error: 'Failed to process list' + } + }); + } + } + + return { + lists: allLists, + totalLists: allLists.length, + summary: { + totalTmdbIds: allLists.reduce((sum, list) => sum + list.tmdbIds.length, 0), + totalExpectedItems: allLists.reduce((sum, list) => sum + (list.metadata.expectedItemCount || 0), 0) + } + }; + } catch (error) { + throw createError({ + statusCode: 500, + statusMessage: 'Failed to fetch Letterboxd lists' + }); + } +}); \ No newline at end of file