diff options
author | Aleksei <email@email.email> | 2022-07-14 19:40:24 +0300 |
---|---|---|
committer | Aleksei <email@email.email> | 2022-07-14 19:40:24 +0300 |
commit | cb3470437da60416b0e12b35afce92dd3f043efb (patch) | |
tree | 87411f68ac8e26ed4a3af53a91394e3a34918d00 /scrape_mp3_links_from_URL.lua |
Diffstat (limited to 'scrape_mp3_links_from_URL.lua')
-rwxr-xr-x | scrape_mp3_links_from_URL.lua | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/scrape_mp3_links_from_URL.lua b/scrape_mp3_links_from_URL.lua new file mode 100755 index 0000000..c33825c --- /dev/null +++ b/scrape_mp3_links_from_URL.lua @@ -0,0 +1,40 @@ +#!/usr/bin/lua +--scrape_mp3_links_from_URL.lua +--Copyright (C) 2020-2021 Aleksei Kovura + +--This program is free software: you can redistribute it and/or modify +--it under the terms of the GNU General Public License as published by +--the Free Software Foundation, either version 3 of the License, or +--(at your option) any later version. + +--This program is distributed in the hope that it will be useful, +--but WITHOUT ANY WARRANTY; without even the implied warranty of +--MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +--GNU General Public License for more details. + +--You should have received a copy of the GNU General Public License +--along with this program. If not, see <https://www.gnu.org/licenses/>. +local gumbo = require 'gumbo' +local http_request = require 'http.request' + +headers, stream = assert(http_request.new_from_uri(arg[1]):go()) +html = assert(stream:get_body_as_string()) +if headers:get ':status' ~= '200' then + error(html) +end +doc = gumbo.parse(html) +-- generic mp3 links +for _, el in ipairs(doc.links) do + mp3link=string.match(el:getAttribute('href'), '^https://.*%.mp3') + if mp3link then print(mp3link) end +end +-- mp3 links from https://narodnye-pesni.ru +pesniurl=string.match(arg[1], '^https://narodnye%-pesni%.ru') +if (string.match(arg[1], '^https://narodnye%-pesni%.ru.*$')) then + io.stderr:write('matched https://narodnye-pesni.ru' .. '\n') + for _, el in ipairs(doc:getElementsByTagName('div')) do + if (el:hasAttribute('data-track')) then + print(pesniurl .. el:getAttribute('data-track')) + end + end +end |