Showing posts with label SQL. Show all posts
Showing posts with label SQL. Show all posts

Thursday, September 21, 2023

PySpark SQL - write out records with time across multiple days

 Example:

From A:

To B:

Logic:

1. Use sequence() to list out the interval dates between the start and end date.

2. Use explode() to create duplicate rows with the interval dates.

3. Use lag() and row_number() to create extra dates columns for the next steps.

4. Use CASE statement to select different dates based on the row_no to indicate the first/last row.

Code:

tbl_explode as (
select StartDateTime
, explode(sequence(to_date(dateadd(day,1,StartDateTime)), to_date(dateadd(day,1,EndDateTime)), interval 1 day)) as interval_date
, EndDateTime
, *
from tbl_1
where datediff(day, StartDateTime,EndDateTime) > 0 or cast(StartDateTime as date) <> cast(EndDateTimeUTC as date)
),
tbl_time_split as (
select CASE WHEN row_no_min = 1 THEN StartDateTime
ELSE to_timestamp(lag_date) END AS new_StartDateTime
, CASE WHEN row_no_max = 1 THEN to_timestamp(EndDateTime)
ELSE to_timestamp(interval_date) END AS new_EndDateTime
, *
from(
select interval_date
, lag(interval_date, 1) OVER(PARTITION BY WO ORDER BY interval_date) as lag_date
, row_number() OVER (PARTITION BY WO, EventDate ORDER BY interval_date ) as row_no_min
, row_number() OVER (PARTITION BY WO, EventDate ORDER BY interval_date desc ) as row_no_max
, *
from tbl_explode
) d
)

Thursday, July 29, 2021

SQL Server - List all table_names, row_count, usage, view_names, view_definitions from database

--Table

SELECT

s.Name AS SchemaName,

t.Name AS TableName,

p.rows AS RowCounts,

CAST(ROUND((SUM(a.used_pages) / 128.00), 2) AS NUMERIC(36, 2)) AS Used_MB,

CAST(ROUND((SUM(a.total_pages) - SUM(a.used_pages)) / 128.00, 2) AS NUMERIC(36, 2)) AS Unused_MB,

CAST(ROUND((SUM(a.total_pages) / 128.00), 2) AS NUMERIC(36, 2)) AS Total_MB

FROM sys.tables t

INNER JOIN sys.indexes i ON t.OBJECT_ID = i.object_id

INNER JOIN sys.partitions p ON i.object_id = p.OBJECT_ID AND i.index_id = p.index_id

INNER JOIN sys.allocation_units a ON p.partition_id = a.container_id

INNER JOIN sys.schemas s ON t.schema_id = s.schema_id

GROUP BY t.Name, s.Name, p.Rows

ORDER BY 3 desc, 4


You can get data like this:








--View


SELECT v.name as VIEW_NAME, v.create_date , Replace(Replace(SM.definition,char(10),''),char(13),'') as view_definition

FROM sys.views V

     INNER JOIN  sys.sql_modules SM ON V.object_id  = SM.object_id 

     INNER JOIN sys.Objects SO ON SM.Object_id = SO.Object_id

WHERE SO.type = 'v'

Monday, November 2, 2020

SQL - DB summary on count of SP, views, functions, tables

 You can use T-SQL to Count Number Of Stored Procedures, Views, Tables or Functions in a Database by using the Database INFORMATION_SCHEMA view:


SELECT COUNT(*) AS TABLE_COUNT FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE='BASE TABLE'

 

/* Count Number Of Views In A Database */

SELECT COUNT(*) AS VIEW_COUNT FROM INFORMATION_SCHEMA.VIEWS

 

/* Count Number Of Stored Procedures In A Database */

SELECT COUNT(*) AS PROCEDURE_COUNT FROM INFORMATION_SCHEMA.ROUTINES WHERE ROUTINE_TYPE = 'PROCEDURE'

 


 /* Count Number Of Functions In A Database */

SELECT COUNT(*) AS FUNCTION_COUNT FROM INFORMATION_SCHEMA.ROUTINES WHERE ROUTINE_TYPE = 'FUNCTION'

Thursday, July 9, 2020

SQL - table-valued function to get all months between two date range


CREATE FUNCTION [dbo].[GetMonths](@StartDate DATETIME, @EndDate DATETIME)

RETURNS @MonthList TABLE(MonthValue VARCHAR(15) NOT NULL)

AS

BEGIN

    --Variable used to hold each new date value

    DECLARE @DateValue DATETIME

    --Start with the starting date in the range

    SET @DateValue=@StartDate

    --Load output table with the month part of each new date

    WHILE @DateValue <= @EndDate

    BEGIN

        INSERT INTO @MonthList(MonthValue)

        SELECT cast(@DateValue as date)

        --Move to the next month

        SET @DateValue=DATEADD(mm,1,@DateValue)

    END

    RETURN 

END

Monday, July 6, 2020

SQL - Track where a Stored Procedure is being used


 SELECT o.name
 FROM syscomments AS c
 INNER JOIN sysobjects AS o
 ON c.id = o.id
 WHERE c.text LIKE '%stored_procedure_name%';

Tuesday, June 16, 2020

SQL - Pivot and Unpivot


Just for me to remember :)


Pivot:

SELECT API,[1],[2],[3],[4],[5],[6],[7],[8],[9],[10]
FROM (
             SELECT API, row_no, decLiquidBoe/24 AS boed
     FROM #tbl_Liquid_Prod_All
     )d
PIVOT
(SUM(boed) FOR row_no IN ([1],[2],[3],[4],[5],[6],[7],[8],[9],[10])) AS pt


Unpivot:

 SELECT [API], [metric_date], [value]
 FROM #OH_Oil_Output
 UNPIVOT
 ([value] FOR metric_date IN ( [2020-01-01], [2020-02-01], [2020-03-01])) AS u

Thursday, February 13, 2020

SQL - Add constraint for an existing column


-----alter an existing column to have default value
  update  tbl_name
  set appended = getdate()

  alter table  tbl_name
  alter column appended DATETIME NOT NULL ;

  ALTER TABLE tbl_name
  ADD CONSTRAINT DF_Constraint DEFAULT GetDate() FOR appended;


-----get two digit month
 CONVERT(char(2), GetDate(), 101)

Monday, January 20, 2020

SQL - Dynamically unpivot data

 ----------dynamically unpivot production 
DECLARE @colsUnpivot AS NVARCHAR(MAX), @query AS NVARCHAR(MAX)

USE [XXXXXX];
with tbl_metric_date_name as(
SELECT      distinct '['+ c.name+ ']' AS 'ColumnName'
FROM        sys.columnsJOIN sys.tables  t ON c.object_id = t.object_id
WHERE       t.name ='OH_Oil_Output$' and c.name <> 'API'
)

SELECT distinct @colsUnpivot = STUFF((
SELECT ',' + ColumnName
FROM tbl_metric_date_name
FOR XML PATH ('')
                    ), 1, 1, '')
FROM tbl_metric_date_name;

set @query =
 'IF OBJECT_ID(''dbo.temp_OH_Oil_Prod'',''U'') IS NOT NULL DROP TABLE dbo.temp_OH_Oil_Prod
  SELECT API, cast(metric_date as date) as metric_date, [value]
  INTO dbo.temp_OH_Oil_Prod
  FROM [XXXXXX].[dbo].[OH_Oil_Output$] WITH(NOLOCK)
  Unpivot
  ([value] FOR metric_date in ( ' + @colsUnpivot + '))u' 

exec sp_executesql @query;

Wednesday, January 15, 2020

SQL - search for column/table name in database

USE the database you want to search;
SELECT      c.name  AS 'ColumnName' ,t.name AS 'TableName'
FROM        sys.columns c  JOIN sys.tables  t ON c.object_id = t.object_id
WHERE       (t.name like 'LA%' or t.name like '%luw%' or t.name like 'Louisana%' )
        and c.name in ('API','API_NUM','API num','Well_Serial_Num','Well_Serial_No','WELL_SERIA','Well Serial Number','WSN','Serial_no','Well_Serial','well Serial Num')
ORDER BY    TableName  ,ColumnName;

Monday, September 16, 2019

SQL - LEAD and LAG functions to calculate differences between different rows

Reference:
https://www.mssqltips.com/sqlservertutorial/9127/sql-server-window-functions-lead-and-lag/

Usage in industry:

For example, you have a lot of data from directional survey, and you have data measured depth, and inclination. You would like to know the first inclination point which turns or greater than 80, since that might be the place where the well becomes directional. And the length till that point would be vertical depth, total measured depth - depth till that point is lateral length. But you need to check the distance between that point (in my example P2) and its previous point (P1) to make sure that the distance is less that 200, then that confirms the data is right (at least the percentage of data accuracy is high). In this case, you can use Lag() function.




So, by using lag() function, you can do:

select api, max_MD,Lag(DS_MD,1) OVER(PARTITION BY api ORDER BY DS_MD) as previous_VD
from.....



SQL - Pivot row values as column names in select statement (Concatenate row values)

QUOTENAME()

Return a Unicode string with bracket delimiters (default):
or SELECT QUOTENAME('abcdef''()') returns (abcedf)

FOR XML PATH(''), TYPE
converts these rows into a single strongly-typed XML text node with the concatenated rows.

Adding FOR XML PATH to the end of a query allows you to output the results of the query as XML elements, with the element name contained in the PATH argument. For example, if we were to run the following statement:

SELECT ',' + name 
              FROM temp1
              FOR XML PATH ('')
By passing in a blank string (FOR XML PATH('')), we get the following instead:
,aaa,bbb,ccc,ddd,eee
Invoking the method value('.', 'NVARCHAR(MAX)') on that XML node converts the XML node to an nvarchar(MAX) string. 

STUFF()
Remove leading comma with STUFF.
The STUFF statement literally "stuffs” one string into another, replacing characters within the first string. We, however, are using it simply to remove the first character of the resultant list of values.
SELECT abc = STUFF((
            SELECT ',' + NAME
            FROM temp1
            FOR XML PATH('')
            ), 1, 1, '')
FROM temp1
So we end up with:
aaa,bbb,ccc,ddd,eee
Example:

IF OBJECT_ID('dbo.tbl_XXXX','U') IS NOT NULL DROP TABLE dbo.tbl_tbl_XXXX
DECLARE @cols AS NVARCHAR(MAX),@query AS NVARCHAR(MAX) ; SET @cols = STUFF(( SELECT ',' + QUOTENAME(data_refresh_date) FROM dbo.tbl_some_source GROUP BY data_refresh_date order by data_refresh_date FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'),1,1,'')
set @query = 'SELECT state,Play,attribute, metric_level,' + @cols + ' from ( SELECT DISTINCT A.state,A.Play,A.attribute,A.metric_level, A.data_refresh_date,
CONVERT(varchar,A.cnt*100/B.cnt,2) + ''%'' perc
FROM dbo.tbl_some_source A INNER JOIN dbo.tbl_some_source B ON A.State = B.State AND A.Play = B.play AND B.data_refresh_date = A.data_refresh_date AND B.attribute = ''API_Count'' ) x pivot ( max(perc) for data_refresh_date in (' + @cols + ') ) p '
execute('SELECT * INTO dbo.tbl_destination FROM (' + @query + ')x')

Explanation:

The xml part returns an xml result as:


Then by using dynamic SQL, we can use the result as column names to pivot data:

Other reference:



Thursday, August 1, 2019

SQL - Use of PARSENAME() function

Sometimes when we have pivoted value in excel or csv with title as 'Column 1','Column 2',...'Column n'. But in the meantime, those columns means the dates like 1980-01-01, 1980-02-01. After unpivot the data into sql , the data will be like this:
API               date_string Product
053-29070 Column 112 Oil
053-29070 Column 113 Oil
053-29070 Column 118 Oil
053-29070 Column 172 Oil
053-29070 Column 173 Oil
053-29070 Column 177 Oil
053-29070 Column 178 Oil
053-29070 Column 19 Oil
Then how do we know the corresponding year and month?

So here is the trick. Function PARSENAME() can get the specific part of a string delimited by a delimiter. so
for SELECT PARSENAME(12.3,1), you will get 3,
for SELECT PARSENAME(13%12,1), you will get 1

so by using this function together with remainder function, we can get the month of the data.
by using some tricks with number/12, we can get the year of the data.

date_string date_int month_indicator year_indicator date_int_plus
Column 1 1 1 0 2
Column 2 2 2 0 3
Column 3 3 3 0 4
Column 4 4 4 0 5
Column 5 5 5 0 6
Column 6 6 6 0 7
Column 7 7 7 0 8
Column 8 8 8 0 9
Column 9 9 9 0 10
Column 10 10 10 0 11
Column 11 11 11 0 12
Column 12 12 0 1 13
Column 13 13 1 1 14
Column 14 14 2 1 15
Column 15 15 3 1 16
Column 16 16 4 1 17
Column 17 17 5 1 18
Column 18 18 6 1 19
Column 19 19 7 1 20
Column 20 20 8 1 21
Column 21 21 9 1 22
Column 22 22 10 1 23
Column 23 23 11 1 24

--------------------------update production_month---------------------------------------
so we can   set  production_month = 
     case      when PARSENAME(cast(replace(date_string,'Column ','') as int)%12,1) = 1 then 1
                  when PARSENAME(cast(replace(date_string,'Column ','') as int)%12,1) = 2 then 2
  when PARSENAME(cast(replace(date_string,'Column ','') as int)%12,1) = 3 then 3
  when PARSENAME(cast(replace(date_string,'Column ','') as int)%12,1) = 4 then 4
  when PARSENAME(cast(replace(date_string,'Column ','') as int)%12,1) = 5 then 5
  when PARSENAME(cast(replace(date_string,'Column ','') as int)%12,1) = 6 then 6
  when PARSENAME(cast(replace(date_string,'Column ','') as int)%12,1) = 7 then 7
  when PARSENAME(cast(replace(date_string,'Column ','') as int)%12,1) = 8 then 8
  when PARSENAME(cast(replace(date_string,'Column ','') as int)%12,1) = 9 then 9
  when PARSENAME(cast(replace(date_string,'Column ','') as int)%12,1) = 10 then 10
  when PARSENAME(cast(replace(date_string,'Column ','') as int)%12,1) = 11 then 11
  when PARSENAME(cast(replace(date_string,'Column ','') as int)%12,1) = 0 then 12
  else null 
  end
--------------------------update production_year---------------------------------------
IF OBJECT_ID('tempdb..#tbl_prod') IS NOT NULL DROP TABLE #tbl_prod
select distinct cast(PARSENAME(cast(replace(date_string,'Column ','') as int)/12,1) as int) as year_indicator, cast(replace(date_string,'Column ','') as int) +1 as date_int_plus
into #tbl_prod
from XXXX with (nolock)

DECLARE @counter int = 0; 
WHILE @counter <= (select max(cast(replace(date_string,'Column ','') as int)) from XXXX)
BEGIN
update a
set production_year = 1980 + @counter
from XXXX a ,#tbl_prod b
where b.year_indicator = @counter and cast(replace(a.date_string,'Column ','') as int) =               
                  b.date_int_plus and a.api <> '' and a.volume <> 0 and a.volume is not null ;
SET @counter = @counter + 1;
END

--only column 1 is not assigned a year
update XXXX
set production_year = 1980
where date_string = 'Column 1' and api <> '' and volume is not null and volume <> 0;

Or just the easiest way. Use SELECT DATEADD(MONTH,+1,getdate()) to get the date.