Wednesday, March 6, 2019

JAVA - make code short and clean

Try to build small functions (utilities) to help shorten the code.
And also, avoid redundant work.

package com.XXX.XXX.XXX;
import com.XXX.XXX.XXX.utilities.NumericUtilities;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import java.util.Arrays;

public class ChokeSizeReformatting {
    private static final String INVALID_CHARACTER="(TH)|(THS)|(in)|[`]|[']|[-]|[\"]|\\s";    
    private static final String[] IDENTIFIED_STRING={"FUL", "NONE", "OPEN","NO CHOKE","FO"};    
    private static final String[] SPECIAL_CASE  = {".", "ADJ","\\"};    
    private static final String[] SPECIAL_CASE_REPLACEMENT  = {"0.", "64","/"};
 //fist step: to do basic cleaning of the data, 
//and also indicate if the string is number or not.    
    public static double cleanChokeSizeString(String rawChokeSize) {

        String cleanedRawString = rawChokeSize.replaceAll(INVALID_CHARACTER,"");
        String modifiedString = 
        StringUtils.replaceEach(cleanedRawString,SPECIAL_CASE, SPECIAL_CASE_REPLACEMENT);
        //logic from LA, TX, OK, CO completion        
       if (Arrays.stream(IDENTIFIED_STRING).anyMatch(modifiedString::contains)) {

            modifiedString = "1";        } 
           else if (NumericUtilities.isDivisible(modifiedString)) {

            modifiedString = NumericUtilities.fractionToDecimal(modifiedString);        
       } else if (!NumberUtils.isNumber(modifiedString)) {

            modifiedString = "0";        }

        return Double.parseDouble(modifiedString);    }
}







package com.xxxx.xxxx.xxxx.utilities;
import org.apache.commons.lang3.math.NumberUtils;

public class NumericUtilities {

    //check if the data extracted from string can be divided    
       public static boolean isDivisible(String str){
        int charIndex = (str.contains("/")? str.indexOf("/"): 0);
        return (str.contains("/") &&
                NumberUtils.isNumber(str.substring(0,charIndex)) &&
                NumberUtils.isNumber(str.substring(charIndex + 1)) &&
                !str.substring(charIndex + 1).equals("0")) ;    }

    //if data is fraction, then parse to decimal    
        public static String fractionToDecimal(String str) {
           int charIndex = str.indexOf("/");        
           double numerator = Double.parseDouble(str.substring(0, charIndex));        
           double denominator = Double.parseDouble(str.substring(charIndex + 1));
           return String.valueOf(numerator / denominator);    }
}

Wednesday, January 30, 2019

JAVA - Check if coordinates fall in the right county (WGS84)

This code is using concept that point in polygon. to break down US map into each county and to see whether the given coordinate actually falls in the right county.

This is the pom.xml dependency you need to add:

   <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.geotools</groupId>
            <artifactId>gt-referencing</artifactId>
            <version>20.0</version>
        </dependency>
        <dependency>
            <groupId>org.geotools</groupId>
            <artifactId>gt-epsg-hsql</artifactId>
            <version>20.0</version>
        </dependency>
        <dependency>
            <groupId>org.assertj</groupId>
            <artifactId>assertj-core</artifactId>
            <version>3.11.1</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.locationtech.jts</groupId>
            <artifactId>jts-core</artifactId>
            <version>1.16.0</version>
        </dependency>
        <dependency>
            <groupId>org.geotools</groupId>
            <artifactId>gt-main</artifactId>
            <version>20.1</version>
        </dependency>
        <dependency>
            <groupId>org.geotools</groupId>
            <artifactId>gt-shapefile</artifactId>
            <version>20.1</version>
        </dependency>


This is the interface:

public interface CoordinateCountyMatchFlagWGS84LookupProvider {
    String findPolygon(String key);
}

This is the class:

package com.xxxxx.xxxx.xxxx;

import java.io.File;
import java.util.HashMap;

import org.geotools.data.*;
import org.geotools.data.simple.SimpleFeatureSource;
import org.geotools.feature.FeatureCollection;
import org.geotools.feature.FeatureIterator;

import org.locationtech.jts.geom.Point;
import org.locationtech.jts.geom.Coordinate;
import org.locationtech.jts.io.WKTReader;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.GeometryFactory;
import org.locationtech.jts.geom.MultiPolygon;

import org.opengis.feature.simple.SimpleFeature;
import org.opengis.feature.simple.SimpleFeatureType;


public class CoordinateCountyMatchFlagWGS84 {
    private static CoordinateCountyMatchFlagWGS84LookupProvider provider;

    public CoordinateCountyMatchFlagWGS84(CoordinateCountyMatchFlagWGS84LookupProvider provider) {
        CoordinateCountyMatchFlagWGS84.provider = provider;
    }

    private String checkPolygon(String rawKey) {
        return provider.findPolygon(rawKey);
    }

    public Boolean getMatchFlag(String county, String state, Double lat, Double lng) throws Exception {
        String rawKey = county + '-' + state;
        WKTReader rd = new WKTReader();
        //Point
        GeometryFactory fac = new GeometryFactory();
        Point convertPoint = fac.createPoint(new Coordinate(lng, lat));
        Geometry point = rd.read(convertPoint.toString());
        //Shape
        if (checkPolygon(rawKey) != null) {
        /*The units of measurement is based on the underlying spatial reference.
        So, for example, if it is EPSG:4326(WGS84) it is decimal degrees.*/
            Geometry poly = rd.read(checkPolygon(rawKey)).buffer(2); //boundary allowance
            //return true or false
            return poly.contains(point);
        } else return null;
    }

    public static class ShapeProvider implements CoordinateCountyMatchFlagWGS84LookupProvider {
        private WKTReader reader = new WKTReader();
        private HashMap<String, MultiPolygon> County_GEOM = new HashMap<>();

        public ShapeProvider(String countyShapeFile) throws Exception {
            // Get county shape file
            String path = getClass().getResource(countyShapeFile).getPath();
            File file = new File(path);
            FileDataStore myData = FileDataStoreFinder.getDataStore(file);
            SimpleFeatureSource source = myData.getFeatureSource();

            // Get data information query
            SimpleFeatureType schema = source.getSchema();
            Query query = new Query(schema.getTypeName());

            // Get collection of features
            FeatureCollection<SimpleFeatureType, SimpleFeature> collection = source.getFeatures(query);
            FeatureIterator<SimpleFeature> features = collection.features();

            while (features.hasNext()) {
                //Get feature of each polygon
                SimpleFeature feature = features.next();
                String key = feature.getAttribute(1).toString() + "-" + feature.getAttribute(2).toString();
                String WKTString = feature.getAttribute(0).toString();
                MultiPolygon value = (MultiPolygon) reader.read(WKTString);
                County_GEOM.put(key, value);
            }
            features.close();
        }

        public String findPolygon(String key) {
            if (County_GEOM.keySet().contains(key)) {
                //return shape for the county-state key
                return County_GEOM.get(key).toString();
            } else return null;
        }
    }
}


This is the test for the class:

import com.xxxx.xxx.xxx.CoordinateCountyMatchFlagWGS84;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;


public class CoordinateCountyMatchFlagWGS84Test {

    private CoordinateCountyMatchFlagWGS84 lookup;

    private final String CORRECT_STATE = "Texas";
    private final String CORRECT_COUNTY = "Armstrong";
    private final String WRONG_COUNTY = "WrongCounty";
    private final Double CORRECT_LATITUDE = 34.8255435;
    private final Double CORRECT_LONGITUDE = -101.6004429;
    private final Double WRONG_LATITUDE = 39.148469;
    private final Double WRONG_LONGITUDE = -80.102385;

    @Before
    public void setup() throws Exception {

        lookup = new CoordinateCountyMatchFlagWGS84(new CoordinateCountyMatchFlagWGS84.ShapeProvider("/County_WGS84/County_WGS84.shp"));
    }

    @Test
    public void getMatchFlag_ValidCountyStateCoordinate_ReturnTrue() throws Exception {
        Assert.assertEquals(true, lookup.getMatchFlag(CORRECT_COUNTY,CORRECT_STATE,CORRECT_LATITUDE,CORRECT_LONGITUDE));
    }

    @Test
    public void  getMatchFlag_ValidCountyState_InvalidCoordinate_ReturnFalse() throws Exception {
        Assert.assertEquals(false, lookup.getMatchFlag(CORRECT_COUNTY,CORRECT_STATE,WRONG_LATITUDE,WRONG_LONGITUDE));
    }

    @Test
    public void getMatchFlag_InvalidCounty_ReturnFalse() throws Exception {
        Assert.assertNull(lookup.getMatchFlag(WRONG_COUNTY,CORRECT_STATE,CORRECT_LATITUDE,CORRECT_LONGITUDE));
    }

Tuesday, January 8, 2019

Python - Scrape APEX website ( wwv_flow.show)

Reference : https://www.slideshare.net/Enkitec/apex-behind-the-scenes

This kind of website will have several parameters with the same name 'p_arg_names' 
and value 'p_arg_values'. So when doing the post, need to put them in two separate lists.



import requests
from bs4 import BeautifulSoup
from datetime import datetime
from datetime import timedelta

logurl = 'http://xxxxxxxxx/pls/apex/f?p=108:2700'
posturl = 'http://xxxxxxxx/pls/apex/wwv_flow.show'
with requests.Session() as s:
    s.headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) 
         AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"}

    res = s.get(logurl)
    soup = BeautifulSoup(res.text,"lxml")

    p_date_start = (datetime.now() - timedelta(days=7)).date().strftime("%d-%b-%Y"),
    p_date_end = datetime.now().strftime("%d-%b-%Y")
    permit_date_begin = str(p_date_start[0])
    permit_date_end = str(p_date_end)

    p_instance = soup.find(id='pInstance').get('value')
    p_flow_id = soup.find(id='pFlowId').get('value')
    p_flow_step_id = soup.find(id='pFlowStepId').get('value')
    p_query_criteria = 'Permitted on or after: ' + permit_date_begin + '; 
                       Permitted before or on: ' + permit_date_end

    p_arg_names = ['P2700_PERMIT_DATE_BEGIN', 'P2700_PERMIT_DATE_END', 
                 'P2700_QUERY_CRITERIA','P2700_QUERY_STOP','P2700_QUERY_DISPLAY']
    p_arg_values = [permit_date_begin, permit_date_end, p_query_criteria, 'N', '']

    values = {
        'p_flow_id': p_flow_id,
        'p_flow_step_id': p_flow_step_id,
        'p_instance': p_instance,
        'p_debug': '',
         ......
        "p_arg_names": p_arg_names,
        "p_arg_values": p_arg_values
    }

    posturl_final = 'http://xxxxxxxxx/pls/apex/f?p=108:2700:' + p_instance + ':CSV::::'    
    r = s.post(posturl, data=values)
    r2 = s.get(posturl_final)
    soup3 = BeautifulSoup(r2.text,"lxml")

    print(soup3)

Wednesday, December 26, 2018

JAVA - Calculate distance between two wells' coordinates for Pad calculation

This is my first attempt to write JAVA code. could be better and better :)
This code loop through each one of the API in one file to 
get the distance between each one of them.


package com.nan.lens.wellcost;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.*;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;

public class Pad {

    private APIProvider provider;

    public Pad(APIProvider apiProvider) {
        this.provider = apiProvider;
    }

    public static Pad create() {
        try {
            return new Pad(new APIProvider("/apiFile.csv"));
        } catch (IOException e) {
            /*The throw statement creates a new object*/            
            throw new RuntimeException(e);
        }
    }

    // read data into java from csv    
     public static class APIProvider {
        // create map to store API_coordinates pair        
        private HashMap<String, ArrayList<Double>> API_LAT_LONG_1 = new HashMap<>();
        private HashMap<String, ArrayList<Double>> API_LAT_LONG_2;
        // create list to store coordinates        
        private ArrayList<Double> LAT_LONG = new ArrayList<Double>();
        private ArrayList<Double> temp = new ArrayList<Double>();

        public APIProvider(String apiFile) throws IOException {

            InputStream input = Pad.class.getResourceAsStream(apiFile);
            Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(new InputStreamReader(input));

            for (CSVRecord record : records) {
                String key = record.get(0);
                temp.clear();
                /*Force to change type String to Double*/                
                temp.add(0, Double.valueOf(record.get(1)));
                temp.add(1, Double.valueOf(record.get(2)));
                LAT_LONG = (ArrayList<Double>) temp.clone();
                API_LAT_LONG_1.put(key, LAT_LONG);
            }
            //System.out.println("API_LAT_LONG: " + API_LAT_LONG_1);            
            API_LAT_LONG_2 = (HashMap) API_LAT_LONG_1.clone();

            HashMap<String, ArrayList<String>> API_DIST = new HashMap<>();
            ArrayList<String> api_dist_list;
            ArrayList<String> temp_list = new ArrayList<String>();
            final int R = 6371; // Radius of the earth in km
            for (String key : API_LAT_LONG_1.keySet()) {
                //get lat and long from ArrayList                
                List<Double> list = new ArrayList<Double>();
                list = API_LAT_LONG_1.get(key);
                Double lat1 = list.get(0);
                Double lng1 = list.get(1);

                for (String key2 : API_LAT_LONG_2.keySet()) {
                    List<Double> list2 = new ArrayList<Double>();
                    list2 = API_LAT_LONG_2.get(key2);
                    Double lat2 = list2.get(0);
                    Double lng2 = list2.get(1);

                    // Calculate distance between two points                    
                    double latDistance = Math.toRadians(lat2 - lat1);
                    double lonDistance = Math.toRadians(lng2 - lng1);
                    double a = Math.sin(latDistance / 2) * Math.sin(latDistance / 2)
                            + Math.cos(Math.toRadians(lat1)) * Math.cos(Math.toRadians(lat2))
                            * Math.sin(lonDistance / 2) * Math.sin(lonDistance / 2);
                    double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a));
                    double distance = R * c; //km
                    distance = Math.pow(distance, 2);

                    if (key != key2 && distance < 0.0762) { // (km)                        
                        String UID = UUID.randomUUID().toString();
                        temp_list.clear();

                        temp_list.add(0, key);
                        temp_list.add(1, key2);
                        temp_list.add(2, String.valueOf(distance));

                        api_dist_list = (ArrayList<String>) temp_list.clone();
                        API_DIST.put(UID, api_dist_list);
                    }
                }
            }
            System.out.println(API_DIST);

        }
    }
}

Tuesday, December 4, 2018

Apache Kafka for beginners

https://www.cloudkarafka.com/blog/2016-11-30-part1-kafka-for-beginners-what-is-apache-kafka.html#


Apache Kafka and server concepts

Here are important concepts that you need to remember before we dig deeper into Apache Kafka - explained in one line.

  • Producer: Application that sends the messages.
  • Consumer: Application that receives the messages.
  • Message: Information that is sent from the producer to a consumer through Apache Kafka.
  • Connection: A connection is a TCP connection between your application and the Kafka broker.
  • Topic: A Topic is a category/feed name to which messages are stored and published.
  • Topic partition: Kafka topics are divided into a number of partitions, which allows you to split data across multiple brokers.
  • Replicas A replica of a partition is a "backup" of a partition. Replicas never read or write data. They are used to prevent data loss.
  • Consumer Group: A consumer group includes the set of consumer processes that are subscribing to a specific topic.
  • Offset: The offset is a unique identifier of a record within a partition. It denotes the position of the consumer in the partition.
  • Node: A node is a single computer in the Apache Kafka cluster.
  • Cluster: A cluster is a group of nodes i.e., a group of computers.

Thursday, August 23, 2018

SQL - drop all temp tables

declare @sql nvarchar(max)
select @sql = isnull(@sql+';', '') + 'drop table ' + quotename(name)
from tempdb..sysobjects
where name like '#[^#]%'
exec (@sql)

Tuesday, July 31, 2018

SQL - change exponential data to number

SELECT api, CONVERT(numeric(14,0), CAST(api AS FLOAT)) as new_api
FROM [dbo].[tbl_PA_Perforated]
where api like '%e%'