summaryrefslogtreecommitdiff
path: root/scrape_mp3_links_from_URL.lua
blob: c33825c1ac2fce95a7c8f746183e9847a3a5fa98 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/lua
--scrape_mp3_links_from_URL.lua
--Copyright (C) 2020-2021  Aleksei Kovura

--This program is free software: you can redistribute it and/or modify
--it under the terms of the GNU General Public License as published by
--the Free Software Foundation, either version 3 of the License, or
--(at your option) any later version.

--This program is distributed in the hope that it will be useful,
--but WITHOUT ANY WARRANTY; without even the implied warranty of
--MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--GNU General Public License for more details.

--You should have received a copy of the GNU General Public License
--along with this program.  If not, see <https://www.gnu.org/licenses/>.
local gumbo = require 'gumbo'
local http_request = require 'http.request'

headers, stream = assert(http_request.new_from_uri(arg[1]):go())
html = assert(stream:get_body_as_string())
if headers:get ':status' ~= '200' then
		error(html)
end
doc = gumbo.parse(html)
-- generic mp3 links
for _, el in ipairs(doc.links) do
	mp3link=string.match(el:getAttribute('href'), '^https://.*%.mp3')
	if mp3link then print(mp3link) end
end
-- mp3 links from https://narodnye-pesni.ru
pesniurl=string.match(arg[1], '^https://narodnye%-pesni%.ru')
if (string.match(arg[1], '^https://narodnye%-pesni%.ru.*$')) then
	io.stderr:write('matched https://narodnye-pesni.ru' .. '\n')
	for _, el in ipairs(doc:getElementsByTagName('div')) do
		if (el:hasAttribute('data-track')) then
		print(pesniurl .. el:getAttribute('data-track'))
		end
	end
end