Lấy link ảnh từ một file html

Source code …Lấy link ảnh từ một trang của Vnexpress, Code chỉ mang tính chất tham khảo

	package hoasen.edu.vn;


import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;

import android.app.Activity;
import android.os.Bundle;
import android.util.Log;
import android.webkit.WebView;
import android.widget.ProgressBar;
import android.widget.TextView;
import android.widget.Toast;


public class LoadContent extends Activity {
	
	private List<String> images = new ArrayList<String>();
	TextView tv;
	WebView webview;
	String url = "";
	ProgressBar loadingProgressBar,loadingTitle;
	private String temp = "";
	@Override
	protected void onCreate(Bundle savedInstanceState) {
		// TODO Auto-generated method stub
		super.onCreate(savedInstanceState);
		setContentView(R.layout.webview);
		
		url = getIntent().getStringExtra("URL");
		//url = "http://vnexpress.net/gl/the-thao/bong-da/2011/09/barca-danh-mat-chien-thang-o-phut-bu-gio/";
		
		tv = (TextView)findViewById(R.id.pc); 
		webview =  (WebView) this.findViewById(R.id.webview);
		webview.getSettings().setJavaScriptEnabled(true); 
		webview.getSettings().setJavaScriptCanOpenWindowsAutomatically(true);
		StringBuilder result = new StringBuilder();
		
		try{
					HttpClient client = new DefaultHttpClient();
					HttpGet request = new HttpGet(url);
					HttpResponse response = client.execute(request);
			
					
					BufferedReader rd = new BufferedReader(new InputStreamReader(response.getEntity().getContent()));
					String line = "";
				
					int space = 0;
					while ((line = rd.readLine()) != null) {
					
						if(line.startsWith("<H2 class=Lead>"))
							space++;
						if(space == 1){
							if(line.startsWith("<H2 class=Lead>")){
								result.append(line);	
							}
							if(!line.contains("</div>")){
								result.append(line);
							}else
								break;
						}		
						
					}
		
					String ct = result.toString(); 
	
					try{
			
							temp = result.toString();
							if(ct.lastIndexOf("</STRONG></P>") != -1 && ct.indexOf("<script") == -1);
							else{
				
								StringBuilder str = new StringBuilder();
								str.append(temp);
				
								if(temp.lastIndexOf("<TABLE") > temp.lastIndexOf("</P>")){
									temp = result.delete(temp.lastIndexOf("<TABLE"), temp.length()).toString();
								}
								if(str.indexOf("<script") != -1){
									temp = result.toString();
									temp = result.delete(temp.indexOf("<script"), temp.length()).toString();
								}
							}
					}catch(Exception exp){				
							temp = result.toString();
							Log.d("CATCH :", "CATCH");
					}
		
				
				temp = result.delete(result.toString().indexOf("<H2"), result.toString().indexOf("</H2>")).toString();
				
				Pattern p = Pattern.compile("/Files");
			    Matcher m = p.matcher(temp);
			    temp = m.replaceAll("http://vnexpress.net/Files");
			    
			    Pattern p1 = Pattern.compile("H2");
			    Matcher m1 = p1.matcher(temp);
			    temp = m1.replaceAll("H4");
			    
				webview.loadData("<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"><body>" + temp + "</body></html>", "text/html", "utf-8");		
				
	
				//Toast.makeText(getApplicationContext(), temp, Toast.LENGTH_LONG).show();	
			
				////////////////////////////////////////////////////
				//StringBuilder str = new StringBuilder();
			    
				//int len = temp.length();
				//int i=0;
				/*
				 Neu nhu co the src thi :
				  		 - lay index tai vi tri do
				  		 - cho vong for voi index bat dau la tai vi tri co the src, ket thuc la dau .
				  		 - sau khi ket thuc vong for thi :
				  		 		- String ban dau se duoc cat tu vi tri ket thuc vong For cho den ket thuc temp
				  		 		- 
				  
				 */
				
			   while(temp.indexOf("src") !=-1){
			  			    	
			    		StringBuilder tempstr = new StringBuilder();
			    	
			    		int imageindex = temp.indexOf("src")+5;
			    		int j = imageindex;
			    		
			    		char s = temp.charAt(j);
			    		
			   		
			    		while(s != '"'){
			    			tempstr.append(s);
			    			j++;
			    			s = temp.charAt(j);		    			
			    		}
			    		
			    		images.add(tempstr.toString());
			    		StringBuilder newstr = new StringBuilder();
			    		newstr.append(temp);
			    		
			    		newstr.delete(0, j+1);
			    		
			    		temp = newstr.toString();	
			    		
			    }
			   
		}catch(Exception exp){				
			temp = result.toString();
			Toast.makeText(getApplicationContext(), exp.toString(), Toast.LENGTH_LONG).show();
		}
		for(int k=0; k<images.size(); k++){
	    	System.out.println("IMAGES TAG :" + images.get(k));
	    	Toast.makeText(getApplicationContext(),images.get(k), Toast.LENGTH_LONG).show();
	    }
	}
	
	public String getContent(){
		return new String("");
	}
}
	
		
Advertisements

9 thoughts on “Lấy link ảnh từ một file html

  1. ralph lauren outlet

    Excellent beat ! I would like to apprentice while you amend your web site, how could i subscribe for a blog web site? The account aided me a acceptable deal. I have been tiny bit acquainted of this your broadcast provided vivid transparent idea

    Reply
  2. backlinking service

    certainly like your web site but you need to take a look at the spelling on several of your posts. A number of them are rife with spelling issues and I to find it very troublesome to inform the truth nevertheless I will certainly come back again.

    Reply
  3. The jewelry man

    Great work! That is the kind of info that are meant to be shared around the internet. Disgrace on Google for now not positioning this put up higher! Come on over and consult with my web site . Thank you =)

    Reply
  4. Fashion Earring

    Its such as you read my thoughts! You appear to understand so much about this, such as you wrote the e book in it or something. I feel that you can do with some % to force the message home a bit, however instead of that, this is excellent blog. A great read. I will certainly be back.

    Reply

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s