summaryrefslogtreecommitdiff
path: root/scrape_mp3_links_from_URL.lua
diff options
context:
space:
mode:
Diffstat (limited to 'scrape_mp3_links_from_URL.lua')
-rwxr-xr-xscrape_mp3_links_from_URL.lua40
1 files changed, 40 insertions, 0 deletions
diff --git a/scrape_mp3_links_from_URL.lua b/scrape_mp3_links_from_URL.lua
new file mode 100755
index 0000000..c33825c
--- /dev/null
+++ b/scrape_mp3_links_from_URL.lua
@@ -0,0 +1,40 @@
+#!/usr/bin/lua
+--scrape_mp3_links_from_URL.lua
+--Copyright (C) 2020-2021 Aleksei Kovura
+
+--This program is free software: you can redistribute it and/or modify
+--it under the terms of the GNU General Public License as published by
+--the Free Software Foundation, either version 3 of the License, or
+--(at your option) any later version.
+
+--This program is distributed in the hope that it will be useful,
+--but WITHOUT ANY WARRANTY; without even the implied warranty of
+--MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+--GNU General Public License for more details.
+
+--You should have received a copy of the GNU General Public License
+--along with this program. If not, see <https://www.gnu.org/licenses/>.
+local gumbo = require 'gumbo'
+local http_request = require 'http.request'
+
+headers, stream = assert(http_request.new_from_uri(arg[1]):go())
+html = assert(stream:get_body_as_string())
+if headers:get ':status' ~= '200' then
+ error(html)
+end
+doc = gumbo.parse(html)
+-- generic mp3 links
+for _, el in ipairs(doc.links) do
+ mp3link=string.match(el:getAttribute('href'), '^https://.*%.mp3')
+ if mp3link then print(mp3link) end
+end
+-- mp3 links from https://narodnye-pesni.ru
+pesniurl=string.match(arg[1], '^https://narodnye%-pesni%.ru')
+if (string.match(arg[1], '^https://narodnye%-pesni%.ru.*$')) then
+ io.stderr:write('matched https://narodnye-pesni.ru' .. '\n')
+ for _, el in ipairs(doc:getElementsByTagName('div')) do
+ if (el:hasAttribute('data-track')) then
+ print(pesniurl .. el:getAttribute('data-track'))
+ end
+ end
+end