1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
| import org.apache.commons.io.FileUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements;
import java.io.File; import java.io.IOException; import java.net.URISyntaxException; import java.net.URL; import java.util.ArrayList; import java.util.List;
class Category { private final String name; private final String url;
public Category(String name, String url) { this.name = name; this.url = url; }
public String getName() { return name; }
public String getUrl() { return url; }
@Override public String toString() { return "分类名称: " + getName() + "\t\t" + "分类地址: " + getUrl(); } }
class PPT { private final String imgUrl; private final String name; private final String url;
public PPT(String imgUrl, String name, String url) { this.imgUrl = imgUrl; this.name = name; this.url = url; }
public String getImgUrl() { return imgUrl; }
public String getName() { return name; }
public String getUrl() { return url; }
@Override public String toString() { return "名称: " + getName() + "\t\t" + "图片地址: " + getImgUrl() + "\t\t" + "内容地址: " + getUrl(); } }
class DownloadUrl { private final String one; private final String two;
public DownloadUrl(String one, String two) { this.one = one; this.two = two; }
public String getOne() { return one; }
public String getTwo() { return two; }
@Override public String toString() { return "下载地址一: " + getOne() + "\t\t" + "下载地址二: " + getTwo(); } }
public class OnePPT {
private static final String SERVER_URL = "http://www.1ppt.com";
private static final String ROOT_DIR = "D:\\OnePPT\\";
public static void main(String[] args) throws IOException, URISyntaxException { List<Category> categories = requestCategory(); for (Category category : categories) { System.out.println(category); File file = new File(ROOT_DIR + category.getName()); if (!file.exists()) { file.mkdir(); } String[] strings = category.getUrl().split("/"); List<PPT> pptList = requestCategoryAll(category.getUrl(), strings[strings.length - 1]); for (PPT ppt : pptList) { System.out.println(ppt); DownloadUrl downloadUrl = requestDownloadUrls(requestDownloadPageUrl(ppt.getUrl())); System.out.println(downloadUrl); String filePath = ROOT_DIR + category.getName() + "\\" + ppt.getName() + ".zip"; FileUtils.copyURLToFile(new URL(downloadUrl.getOne()), new File(filePath)); System.out.println("下载完成 => " + filePath); System.out.println(); } } }
public static List<Category> requestCategory() throws IOException { Document root = Jsoup.connect(SERVER_URL).get(); Elements nav = root.getElementsByClass("col_nav i_nav clearfix"); Elements list = nav.select("ul li a"); List<Category> categories = new ArrayList<>(); list.forEach(element -> categories.add(new Category(element.attr("title"), SERVER_URL + element.attr("href")))); return categories; }
public static List<PPT> requestByCategory(String url) throws IOException { Document root = Jsoup.connect(url).get(); Elements list = root.getElementsByClass("tplist").select("li"); List<PPT> data = new ArrayList<>(); list.forEach(element -> data.add(new PPT(element.select("img").attr("src"), element.select("img").attr("alt"), SERVER_URL + element.select("a").attr("href")))); return data; }
public static String requestDownloadPageUrl(String url) throws IOException { Document root = Jsoup.connect(url).get(); return SERVER_URL + root.getElementsByClass("downurllist").get(0).select("li a").attr("href"); }
public static DownloadUrl requestDownloadUrls(String url) throws IOException { Document root = Jsoup.connect(url).get(); Elements list = root.getElementsByClass("downloadlist"); Elements li = list.get(0).select("li a"); return new DownloadUrl(li.get(0).select("a").attr("href"), li.get(1).select("a").attr("href")); }
public static String requestNextPageUrl(String url, String category) throws IOException { Document root = Jsoup.connect(url).get(); Elements pageLi = root.getElementsByClass("pages").select("li"); Element element = pageLi.get(pageLi.size() - 2); if (element.select("a").text().equals("下一页")) { return SERVER_URL + "/moban/" + category + "/" + pageLi.get(pageLi.size() - 2).select("a").attr("href"); } return null; }
public static List<PPT> requestCategoryAll(String url, String category) throws IOException { List<PPT> data = new ArrayList<>(requestByCategory(url)); String currentUrl = url; while ((currentUrl = requestNextPageUrl(currentUrl, category)) != null) { data.addAll(requestByCategory(currentUrl)); } return data; } }
|