geturls.py, from class of Tuesday, March 9, 2021

# -*- coding: utf-8 -*-
"""
Created on Tue Mar  9 09:57:10 2021

@author: rmontant
"""
import os, sys
import urllib.request

def process_urllist(l):
    resultsfile = l[0]
    with open(resultsfile, 'w') as h:
        for url in l[1:]:
            if len(url) == 0:
                continue
            rh = urllib.request.urlopen(url)
            contents = rh.read()

            for decoder in ['ascii', 'cp1252', 'latin-1', 'utf-8']:
                try:
                    contents = contents.decode(decoder)
                    print('Used', decoder)
                except:
                    print(url, 'is not', decoder)
                else:
                    print(url, file=h)
                    print(contents, file=h)


def main(argv=[__name__]):
    print("you are in {}".format(os.getcwd()))
    ok = False
    while not ok:
        spec_dir = input('What directory? ')
        if len(spec_dir) == 0:
            return 1    # abandon the program

        try:
            spec_files = os.listdir(spec_dir)
        except:
            print('Bad directory name')
        else:
            ok = True

    #spec_dir = spec_dir.replace('/', '\\')
    #print(spec_dir)

    all_urls = {}
    for urlfile in spec_files:
        filepath = os.path.join(spec_dir, urlfile)
        print('opening {:s}'.format(filepath))
        with open(filepath, 'r') as h:
            urls = [line.strip() for line in h.readlines()]
        all_urls[urlfile] = urls

    #print(all_urls)

    #
    # What follows looks like assignment "asn2"
    #
    for k, v in all_urls.items():
        print(k)
        process_urllist(v)  # magically process the URLs

    print('main() is done.')

if __name__ == '__main__':
    sys.exit( main(sys.argv) )